[R] multiple paired t-tests without loops

Matthew Finkbeiner matthew.finkbeiner at maccs.mq.edu.au
Tue Apr 27 00:09:27 CEST 2010


Yes, I suspect that I will end up using a sampling approach, but I'd 
like to use an exact test if it's at all feasible.

Here are two samples of data from 3 subjects:
Sample	Subj	C1	C2
44	1	0.0093	0.0077
44	2	0.0089	0.0069
44	3	0.051	0.0432
44	4	0.014	0.0147
44	5	0.0161	0.0117
45	1	0.0103	0.0086
45	2	0.0099	0.0078
45	3	0.0542	0.0458
45	4	0.0154	0.0163
45	5	0.0175	0.0129


and then here is the script I've pieced together from things I've found 
on the web (sorry for not citing the snippets!).  any pointers on how to 
speed it up would be greatly appreciated.

#----------------------------------
# Utility function
# that returns binary representation of 1:(2^n) X SubjN
binary.v <-
function(n)
{
   x <- 1:(2^n)
   mx <- max(x)
   digits <- floor(log2(mx))
   ans <- 0:(digits-1); lx <- length(x)
   x <- matrix(rep(x,rep(digits, lx)),ncol=lx)
   x <- (x %/% 2^ans) %% 2
}

library(plyr)


#first some global variables
TotalSubjects <- 5
TotalSamples <- 2
StartSample <- 44
EndSample <- ((StartSample + TotalSamples)-1)
maxTs <- NULL
obsTs <- NULL




#create index array that drives the permuations for all samples
ind <- binary.v(TotalSubjects)

#transpose ind so that the first 2^N items correspond to S1,
#the second 2^N correspond to S2 and so on...
transind <- t(ind)

#get data file that is organized first by sample then by subj (e.g. 
sample1 subject1
# sample1 subject 2 ... sample 1 subject N)
#sampledatafile <- file.choose()

samples <- read.table(sampledatafile, header=T)

#this is the progress bar
pb <- txtProgressBar(min = StartSample, max = EndSample, style = 3)
setTxtProgressBar(pb, 1)

start.t <- proc.time()

#begin loop that analyzes data sample by sample
for (s in StartSample:EndSample) {

     S <- samples[samples$Sample==s,] #pick up data for current sample

     #reproduce data frame rows once for each permutation to be done
     expanddata <- S[rep(1:nrow(S), each = 2^TotalSubjects),]


     #create new array to hold the flipped (permuted) data
     permdata = expanddata

     #permute the data
     permdata[transind==1,3] <- expanddata[transind==1,4] #Cnd1 <- Cnd2
     permdata[transind==1,4] <- expanddata[transind==1,3] #Cnd2 <- Cnd1

     #create permutation # as a factor in dataframe
     PermN <- rep(rep(1:2^TotalSubjects, TotalSubjects),2)

     #create Sample# as a factor
     Sample <- rep(permdata[,1],2) #Sample# is in the 1st Column

     #create subject IDs as a factor
     Subj <- rep(permdata[,2],2) #Subject ID is in the 2nd Column

     #stack the permutated data
     StackedPermData <- stack(permdata[,3:4])

     #bind all the factors together
     StackedPermData <- as.data.frame(cbind(Sample, Subj, PermN, 
StackedPermData))


     #sort by perm
     sortedstack <- 
as.data.frame(StackedPermData[order(StackedPermData$PermN,
                                 StackedPermData$Sample),])


     #clear up some memory
     rm(expanddata, permdata, StackedPermData)

     #pull out data 1 perm at a time
     res<-ddply(sortedstack, c("Sample", "PermN"), function(.data){

         # Type combinations by Class
         combs<-t(combn(sort(unique(.data[,5])),2))

         # Applying the t-test for them
         aaply(combs,1, function(.r){
         x1<-.data[.data[,5]==.r[1],4] # select first column
         x2<-.data[.data[,5]==.r[2],4] # select first column

         tvalue <- t.test(x1,x2, paired = T)

         res <- c(tvalue$statistic,tvalue$parameter,tvalue$p.value)
         names(res) <- c('stat','df','pvalue')
         res
         }
         )
     }
     )

      # update progress bar
      setTxtProgressBar(pb, s)

      #get max T vals
      maxTs <- c(maxTs, tapply (res$stat, list (res$Sample), max))

      #get observed T vals
      obsTs <- c(obsTs, res$stat[length(res$stat)])

      #here we need to save res to a binary file


}
#close out the progress bar
close(pb)

end.t <- proc.time() - start.t
print(end.t)

#get cutoffs
#these are the 2-tailed t-vals that maintain experimentwise error at the 
0.05 level
lowerT <- quantile(maxTs, .025)
upperT <- quantile(maxTs, .975)











On 4/27/2010 6:53 AM, Greg Snow wrote:
> The usual way to speed up permutation testing is to sample from the
> set of possible permutations rather than looking at all possible
> ones.
>
> If you show some code then we may be able to find some inefficiencies
> for you, but there is not general solution, poorly written uses of
> apply will be slower than well written for loops.  In some cases
> rewriting critical pieces in C or fortran will help quite a bit, but
> we need to see what you are already doing to know if that will help
> or not.
>



More information about the R-help mailing list