[R] xtabs

Thu Feb 14 06:04:43 CET 2002

At 04:29 PM 2/13/02 -0700, Michael Lynn Fugate wrote:
>Hi,
>
>In Splus if I call the function crosstabs() the output is a contigency
>table; in each cell of the table is printed: N, N/RowTotal,
>N/ColTotal, N/Total.  N is the number of observations in each cell.
>
>The same call to xtabs() in R will produce the contigency table but the
>only entry in each cell is N.
>
>How can I get the same relative frequencies that crosstabs() gives?
>
>Thanks,
>mike
>
>--
>
>******************************************************************
>| Michael Fugate                         Phone:  (505) 667-0398  |
>| Los Alamos National Laboratory         Fax:    (505) 665-5220  |
>| Group: CCS-3,  Mail Stop: B265         e-mail: fugate at lanl.gov |
>| Los Alamos, NM 87545                                           |
>******************************************************************
This not an elegant solution. Once you have your matrix or array 
contingency table, the following function will give you the marginal 
totals, marginal distributions, and joint distributions as a list that you 
can index to extract the specific result you want. Here's the function at work:
 > x <- array(1:8,c(2,2,2))
 > x
, , 1

      [,1] [,2]
[1,]    1    3
[2,]    2    4

, , 2

      [,1] [,2]
[1,]    5    7
[2,]    6    8

 > marginals(x)
$margin.totals
, , 1

      [,1] [,2] [,3]
[1,]    1    3    4
[2,]    2    4    6
[3,]    3    7   10

, , 2

      [,1] [,2] [,3]
[1,]    5    7   12
[2,]    6    8   14
[3,]   11   15   26

$col.distrib
, , 1

           [,1]      [,2] [,3]
[1,] 0.3333333 0.4285714  0.4
[2,] 0.6666667 0.5714286  0.6
[3,] 1.0000000 1.0000000  1.0

, , 2

           [,1]      [,2]      [,3]
[1,] 0.4545455 0.4666667 0.4615385
[2,] 0.5454545 0.5333333 0.5384615
[3,] 1.0000000 1.0000000 1.0000000

$row.distrib
, , 1

           [,1]      [,2] [,3]
[1,] 0.2500000 0.7500000    1
[2,] 0.3333333 0.6666667    1
[3,] 0.3000000 0.7000000    1

, , 2

           [,1]      [,2] [,3]
[1,] 0.4166667 0.5833333    1
[2,] 0.4285714 0.5714286    1
[3,] 0.4230769 0.5769231    1

$joint.distrib
, , 1

      [,1] [,2] [,3]
[1,]  0.1  0.3  0.4
[2,]  0.2  0.4  0.6
[3,]  0.3  0.7  1.0

, , 2

           [,1]      [,2]      [,3]
[1,] 0.1923077 0.2692308 0.4615385
[2,] 0.2307692 0.3076923 0.5384615
[3,] 0.4230769 0.5769231 1.0000000

Now here's the function:

marginals <- function(x){
#get marginal counts/distributions of matrix/array
#x=matrix or array
#Note: creates and uses indexing function
#
index <- function(x,MARGIN=1,INDEX=list(1:dim(x)[1]),drop=TRUE){
#Index any array along any dimension
#without need to specify number of dimensions!
#Tomas Aragon, S-Plus, Created 11/04/97, Edited
#x =  array
#MARGIN = dimension to be subscripted
#INDEX = index, default returns x unchanged
#drop = FALSE, TRUE maintains same number of dimensions
         dx <- dim(x)
         ldx <- length(dim(x))
         if(is.list(INDEX) == F){INDEX <- list(INDEX)}
         dlist <- vector("list", ldx)
         dlist[MARGIN] <- INDEX
         for(i in (1:ldx)[ - MARGIN]) {
                 dlist[i] <- list(1:dx[i])
         }
         pp <- paste(dlist, ",", sep = "", collapse = "")
         if(drop == TRUE){txt <- "drop=TRUE"}
         else txt <- "drop=FALSE"
         eval(parse(text = paste("x[", pp, txt, "]")))
}
#for matrices
if(is.null(dim(x))==TRUE) stop("Must be matrix or array")
dims <- dim(x)
dn <- dimnames(x)
nn <- names(dimnames(x))
#for matrices
if(length(dims)==2){
   xx <- cbind(x,apply(x,1,sum))
   result <- rbind(xx,apply(xx,2,sum))
   if(!is.null(dn)==TRUE) {
   dimnames(result) <- list(c(dn[[1]],"Total"),c(dn[[2]],"Total"))
   }
   if(!is.null(nn)==TRUE) {names(dimnames(result)) <- nn}
   result2 <- sweep(result, 2, result[nrow(result),  ], "/")
   result3 <- sweep(result, 1, result[, ncol(result)], "/")
   result4 <- result/result[nrow(result), ncol(result)]

}
#for arrays
if(length(dims)>=3){
   dnlist <- vector("list",length(dims))
   xx <- matrix(x,nrow=dims[1])
   txxs <- t(rbind(xx,apply(xx,2,sum)))
   txxs2 <- sweep(txxs,1,txxs[,ncol(txxs)],"/")
   xxx <- matrix(txxs,dims[2])
   xxxs <- (rbind(xxx,apply(xxx,2,sum)))
   txxx <- t(matrix(xxxs,2*dims[2]+2))
   result <- array(txxx,dim=c(dims[1:2]+1,dims[-c(1:2)]))
   dnlist[[1]] <- c(dn[[1]],"Total")
   dnlist[[2]] <- c(dn[[2]],"Total")
   for(i in 3:length(dims)){dnlist[[i]] <- c(dn[[i]])}
   if(!is.null(dn)==TRUE) {dimnames(result) <- dnlist}
   if(!is.null(nn)==TRUE) {names(dimnames(result)) <- nn}
   sub1 <- index(result,1,dim(result)[1],drop=FALSE)
   sub2 <- index(result,2,dim(result)[2],drop=FALSE)
   sub3 <- index(result,c(1,2),list(dim(result)[1],dim(result)[2]),drop=FALSE)
   result2 <- sweep(result,(1:length(dims))[-1],sub1,"/")
   result3 <- sweep(result,(1:length(dims))[-2],sub2,"/")
   result4 <- sweep(result,(1:length(dims))[- c(1,2)],sub3,"/")

}
list(margin.totals=result,col.distrib=result2,row.distrib=result3,joint.distrib=result4)
}

Good luck,
Tomas

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._