[R] crosstabulation

Victor Moreno v.moreno at ico.scs.es
Thu Sep 6 17:56:41 CEST 2001


Hi,
I find difficult to read crosstabulated data without percentages, so I wrote 
this dirty function that may be useful to others. It only works up to 3 
dimensions since I am not very good at programming in R. I would appreciate 
if someone else has a better similar function or can improve this one.

As an example of use:
> x<-rbinom(100,1,.3)
> y<-rbinom(100,1,.3)
> z<-rbinom(100,1,.3)

> Table(x)
  x     (%)
0    71 ( 71.0)
1    29 ( 29.0)

> Table(x,y)
   y
x   0     (%)     1     (%)
  0    52 ( 73.2)    19 ( 26.8)
  1    17 ( 58.6)    12 ( 41.4)

> Table(x,y,margin=2)  #column percentages
   y
x   0     (%)     1     (%)
  0    52 ( 75.4)    19 ( 61.3)
  1    17 ( 24.6)    12 ( 38.7)

> Table(x,y,z)
 
 z = 0
   y
x   0     (%)     1     (%)
  0    38 ( 71.7)    15 ( 28.3)
  1    14 ( 63.6)     8 ( 36.4)
 
 z = 1
   y
x   0     (%)     1     (%)
  0    14 ( 77.8)     4 ( 22.2)
  1     3 ( 42.9)     4 ( 57.1)
>

#################################################
Table<-
function(..., margin = 1)
{
proportion.table<-
function(data, MARGIN = 1)
{
if(is.null(d <- dim(data)))
stop("data is not an array")
if(any(data < 0) || any(trunc(data) != data))
stop("data is not an array of counts")
if(length(MARGIN))
sweep(data, MARGIN = MARGIN, apply(data, MARGIN = MARGIN, sum), "/")
else data/sum(data)
}
tt <- table(...)
dd <- dim(tt)
dnam <- dimnames(tt)
if(length(dd) > 3)
stop("max 3 dimensions")
if(length(dd) == 1) {
dd <- c(dd, 1)
dim(tt) <- dd
margin <- 2
dnam<-list(dnam[[1]],names(dnam[1]))
}
dd[2] <- dd[2] * 2
rr <- array(dim = dd)
rr.lab <- rep(dnam[[2]], 2)
if(length(dd) == 3) {
for(j in 1:dd[3]) {
pp <- proportion.table(tt[,  , j], margin)
for(i in seq(1, dd[2], 2)) {
rr[, i, j] <- formatC(tt[, i/2 + 0.5, j], 0,5,format="f" )
rr[, i + 1, j] <- paste("(", formatC(100 * pp[, i/2 + 0.5], 1, 5,format="f"), 
")", sep = "")
rr.lab[i] <- dnam[[2]][i/2 + 0.5]
rr.lab[i + 1] <- "(%)"
} } }
else {
# dim<=2
pp <- proportion.table(tt, margin)
for(i in seq(1, dd[2], 2)) {
rr[, i] <- formatC(tt[, i/2 + 0.5], 0, 5,format="f")
rr[, i + 1] <- paste("(", formatC(100 * pp[, i/2 + 0.5], 1, 5,format="f"), 
")", sep = "")
rr.lab[i] <- dnam[[2]][i/2 + 0.5]
rr.lab[i + 1] <- "(%)"
}
}
dnam[[2]] <- rr.lab
dimnames(rr) <- dnam

if(length(dim(rr)) == 2) {
print(rr, quote = F)
}
else {
for(i in 1:dim(rr)[3]) {
cat("\n",names(dnam[3]),"=", dimnames(rr)[[3]][i], "\n")
print(rr[,  , i], quote = F)
}
}

invisible(rr)
}

#################################################

-- 
Victor Moreno                                       V.Moreno at ico.scs.es
Servei d'Epidemiologia i Registre del Cancer        http://lbe.uab.es
Institut Catala d'Oncologia                         
Gran Via km 2.7, 08907 Hospitalet, Barcelona, Spain
Tel: + 34 93260 7434 / 7401 / 7812                  fax +34 93260 7787
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._



More information about the R-help mailing list