[R] Performance enhancement for ave

Hadley Wickham hadley at rice.edu
Tue Jun 29 05:38:59 CEST 2010


library(plyr)

n<-100000
grp1<-sample(1:750, n, replace=T)
grp2<-sample(1:750, n, replace=T)
d<-data.frame(x=rnorm(n), y=rnorm(n), grp1=grp1, grp2=grp2)

system.time({
  d$avx1 <- ave(d$x, list(d$grp1, d$grp2))
  d$avy1 <- ave(d$y, list(d$grp1, d$grp2))
})
#   user  system elapsed
# 39.300   0.279  40.809
system.time({
  d$avx2 <- ave(d$x, interaction(d$grp1, d$grp2, drop = T))
  d$avy2 <- ave(d$y, interaction(d$grp1, d$grp2, drop = T))
})
#  user  system elapsed
# 6.735   0.209   7.064

all.equal(d$avy1, d$avy2)
# TRUE
all.equal(d$avx1, d$avx2)
# TRUE

i.e. ave should use g <- interaction(..., drop = TRUE)

Hadley

-- 
Assistant Professor / Dobelman Family Junior Chair
Department of Statistics / Rice University
http://had.co.nz/



More information about the R-help mailing list