[Rd] about mantelhaen.test (PR#7779)

dfolkins at temple.edu dfolkins at temple.edu
Tue Oct 31 02:48:46 CET 2006


This is an OpenPGP/MIME signed message (RFC 2440 and 3156)
--------------enig5A700CC15DEABD01445A1B70
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable

Hi,

I have modified the code originally posted to include capability for
arbitrary weightings for the rows and columns (which apply in the
nominal-ordinal and the ordinal-ordinal cases). The code is pasted below
(as well as an example based on job satisfaction data).

However, I cannot figure out how to make the output of class "htest"
make nicely-formatted output. When I use the class htest, with a list of
df's, test statistics, etc, the printed output looks ugly. (See the last
few commented lines at the end of the function for my aborted attempt to
do this. To see what happens when that code is used, comment out the
last four uncommented lines (the ones that mention variable "result"),
and uncomment all the commented lines at the end.)

I am an R newbie by any standard, so if anyone could clue me in on how
to use htest in the case of multiple statistics and dfs, that would be
great.

Once we have that, does that mean that this can serve as a good
replacement for mantelhaen.test() in R?

Any comments appreciated!

code follows:
------
# takes a 3-d array x, scores for rows, and scores for columns
# runs nominal-nominal, ordinal-nominal, and ordinal-ordinal CMH tests
# this function is based on one posted at
# http://bugs.r-project.org/cgi-bin/R/wishlist?id=3D7779;user=3Dguest

mh.test <- function(x, row_scores=3DNULL, col_scores=3DNULL) {
    if (length(dim(x)) !=3D 3){
        stop("x must be a 3 dimensional array")
    }
    if (any(apply(x, 3, sum) < 2)){
        stop("sample size in each stratum must be > 1")
    }
    I <- dim(x)[1];    J <- dim(x)[2];    K <- dim(x)[3]
    if ( (length(row_scores) !=3D I & !is.null(row_scores) ) |
(length(col_scores) !=3D J & !is.null(col_scores)) ){
        stop("scores dimensions must equal to data dimensions")
    }
    if (is.null(row_scores)){
        row_scores =3D 1:I
    }
    if (is.null(col_scores)) {
        col_scores =3D 1:J
    }

    df_GMH <- (I - 1) * (J - 1);    df_SMH <- I - 1;    df_CSMH <- 1
    A_GMH <- cbind(diag(I-1), 0) %x% cbind(diag(J-1), 0)
    #A_SMH <- cbind(diag(I-1), 0) %x% t(1:J)
    A_SMH <- cbind(diag(I-1), 0) %x% t(col_scores)
    #A_CSMH <- t(1:I) %x% t(1:J)
    A_CSMH <- t(row_scores) %x% t(col_scores)
    Y_GMH <- matrix(0, nc =3D 1, nr =3D df_GMH)
    Y_SMH <- matrix(0, nc =3D 1, nr =3D df_SMH)
    Y_CSMH <- matrix(0, nc =3D 1, nr =3D df_CSMH)
    S_GMH <- matrix(0, nc =3D df_GMH, nr =3D df_GMH)
    S_SMH <- matrix(0, nc =3D df_SMH, nr =3D df_SMH)
    S_CSMH <- matrix(0, nc =3D df_CSMH, nr =3D df_CSMH)
    for(k in 1:K) {
        V <- NULL
        f <- x[, , k]
        ntot <- sum(f)
        p_ip <- apply(f, 1, sum) / ntot
        p_pj <- apply(f, 2, sum) / ntot
        m <- p_ip %x% p_pj * ntot
        V <- ntot^2 * ((diag(p_ip) - p_ip %*% t(p_ip)) %x% (diag(p_pj) -
p_pj %*% t(p_pj))) / (ntot-1)
        Y_GMH <- Y_GMH + A_GMH %*% (c(t(f)) - m)
        Y_SMH <- Y_SMH + A_SMH %*% (c(t(f)) - m)
        Y_CSMH <- Y_CSMH + A_CSMH %*% (c(t(f)) - m)
        S_GMH <- S_GMH + A_GMH %*% V %*% t(A_GMH)
        S_SMH <- S_SMH + A_SMH %*% V %*% t(A_SMH)
        S_CSMH <- S_CSMH + A_CSMH %*% V %*% t(A_CSMH)
    }
    Q_GMH <- t(Y_GMH) %*% solve(S_GMH) %*% Y_GMH
    Q_SMH <- t(Y_SMH) %*% solve(S_SMH) %*% Y_SMH
    Q_CSMH <- t(Y_CSMH) %*% solve(S_CSMH) %*% Y_CSMH
    STATISTIC <- c(Q_CSMH, Q_SMH, Q_GMH)
    PARAMETER <- c(df_CSMH, df_SMH, df_GMH)
    PVAL <- pchisq(STATISTIC, PARAMETER, lower =3D FALSE)
    result <- cbind(STATISTIC, PARAMETER, PVAL)
    rownames(result) <- list("Nonzero Correlation", "Row Mean Scores
Differ", "General Association")
    colnames(result) <- list("Statistic", "df", "p-value")
    return (result)
    #DNAME <- deparse(substitute(x))
    #METHOD =3D "Cochran-Mantel-Haenszel Statistics (Based on Table Score=
s)"
    #names(STATISTIC) =3D list("Nonzero Correlation", "Row Mean Scores
Differ", "General Association")
    #names(PARAMETER) =3D list("df", "df", "df")

    #structure(list(statistic =3D STATISTIC, parameter =3D PARAMETER,
p.value =3D PVAL, method =3D METHOD, data.name=3DDNAME), class =3D "power=
=2Ehtest")
        #data.name =3D DNAME, observed =3D x, expected =3D E, residuals =3D=
 (x -
E)/sqrt(E)), class =3D "htest")
}

--------test results:--------
> Satisfaction <-
+     as.table(array(c(1, 2, 0, 0, 3, 3, 1, 2,
+                       11, 17, 8, 4, 2, 3, 5, 2,
+                       1, 0, 0, 0, 1, 3, 0, 1,
+                       2, 5, 7, 9, 1, 1, 3, 6),
+                     dim =3D c(4, 4, 2),
+                     dimnames =3D
+                     list(Income =3D
+                          c("<5000", "5000-15000",
+                            "15000-25000", ">25000"),
+                          "JobSatisfaction" =3D
+                          c("V_D", "L_S", "M_S", "V_S"),
+                          Gender =3D c("Female", "Male"))))
> Satisfaction
, , Gender =3D Female
             JobSatisfaction
Income        V_D L_S M_S V_S
  <5000         1   3 11     2
  5000-15000    2   3 17     3
  15000-25000   0   1    8   5
  >25000        0   2    4   2
, , Gender =3D Male
             JobSatisfaction
Income        V_D L_S M_S V_S
  <5000         1   1    2   1
  5000-15000    0   3    5   1
  15000-25000   0   0    7   3
  >25000        0   1    9   6
> mh.test(Satisfaction, c(3,10,20,35), c(1,3,4,5))
                        Statistic df    p-value
Nonzero Correlation      6.156301 1 0.01309447
Row Mean Scores Differ 9.034222 3 0.02883933
General Association     10.200089 9 0.33453118


--------------enig5A700CC15DEABD01445A1B70
Content-Type: application/pgp-signature; name="signature.asc"
Content-Description: OpenPGP digital signature
Content-Disposition: attachment; filename="signature.asc"

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2.2 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFFRqs35/k4vslVlLIRAg/5AJ93AGEdIakk7T27coDij6jAdo7n6gCeNP/0
YoIcLAawlVvg4yILUXHJZM0=
=+GEH
-----END PGP SIGNATURE-----

--------------enig5A700CC15DEABD01445A1B70--




More information about the R-devel mailing list