[R] How can I make my functions run faster

Mon Aug 19 11:28:32 CEST 2013

Dear R users,

I have written a couple of R functions, some are through the help of the 
R group members. However, running them takes days instead of minutes or 
a few hours. I am wondering whether there is a quick way of doing that.

Here are all my R functions. The last one calls almost all of the 
previous functions. It is the one I am interested in most. It gives me 
the correct output but it takes several days to run only 1000 or 2000 
simulations!
e.g. system.time(test1<-finalF(designs=5,swaps=20));test1
will take about 20 minutes to run but 
system.time(test1<-finalF(designs=5,swaps=50));test1 takes about 10 
hours and system.time(test1<-finalF(designs=25,swaps=2000));test1 takes 
about 3 days to run

Here are my functions

#####################################################################

ls() # list all existing objects
rm(list = ls()) # remove them all
rm(list = ls()[!grepl("global.var.A", ls())])
# refresh memory
gc()
ls()

### Define a function that requires useful input from the user
#b=4;g=seq(1,20,1);rb=5;cb=4;s2e=1; r=10;c=8

#####################################
####################################
# function to calculate heritability
herit<-function(varG,varR=1)
{
   h<-4*varG/(varG+varR)
   return(c(heritability=h))
}

###################################
# function to calculate random error
varR<-function(varG,h2)
{
   varR<- varG*(4-h2)/h2
   return(c(random_error=varR))
}

##########################################
# function to calculate treatment variance
varG<-function(varR=1,h2)
{
   varG<-varR*h2/(4-h2)
   return(c(treatment_variance=varG))
}

###############################

# calculating R inverse from spatial data
rspat<-function(rhox=0.6,rhoy=0.6)
{
   s2e<-1
   R<-s2e*eye(N)
   for(i in 1:N) {
     for (j in i:N){
       y1<-y[i]
       y2<-y[j]
       x1<-x[i]
       x2<-x[j]
       R[i,j]<-s2e*(rhox^abs(x2-x1))*(rhoy^abs(y2-y1)) # Core AR(1)*AR(1)
       R[j,i]<-R[i,j]
     }
   }
   IR<-solve(R)
   IR
}

ped<<-read.table("ped2new.txt",header=FALSE)
# Now work on the pedigree
## A function to return Zinverse from pedigree

ZGped<-function(ped)
{
   ped2<-data.frame(ped)
   lenp2<-length(unique(ped2$V1));lenp2 # how many Genotypes in total in 
the pedigree =40
   ln2<-length(g);ln2#ln2=nrow(matdf)=30
   # calculate the new Z
   Zped<-model.matrix(~ matdf$genotypes -1)# has order N*t = 180 by 30
   dif<-(lenp2-ln2);dif # 40-30=10
   #print(c(lenp2,ln2,dif))
   zeromatrix<-zeros(nrow(matdf),dif);zeromatrix # 180 by 10
   Z<-cbind(zeromatrix,Zped) # Design Matrix for random effect 
(Genotypes): 180 by 40
   # calculate the new G
   M<-matrix(0,lenp2,lenp2) # 40 by 40
   for (i in 1:nrow(ped2)) { M[ped2[i, 1], ped2[i, 2]] <- ped2[i, 3]  }
   G<-s2g*M # Genetic Variance covariance matrix for pedigree 2: 40 by 40
   IG<-solve(G)
   return(list(IG=IG, Z=Z))
}

##########################
##    Required packages    #
############################
library(gmp)
library(knitr) # load this packages for publishing results
library(matlab)
library(Matrix)
library(psych)
library(foreach)
library(epicalc)
library(ggplot2)
library(xtable)
library(gdata)
library(gplots)

#b=6;g=seq(1,30,1);rb=5;cb=6;r=15;c=12;h2=0.3;rhox=0.6;rhoy=0.6;ped=0

setup<-function(b,g,rb,cb,r,c,h2,rhox=0.6,rhoy=0.6,ped="F")
   {
     # where
     # b   = number of blocks
     # t   = number of treatments per block
     # rb  = number of rows per block
     # cb  = number of columns per block
     # s2g = variance within genotypes
     # h2  = heritability
     # r   = total number of rows for the layout
     # c   = total number of columns for the layout

     ### Check points
     if(b==" ")
         stop(paste(sQuote("block")," cannot be missing"))
     if(!is.vector(g) | length(g)<3)
         stop(paste(sQuote("treatments")," should be a vector and more 
than 2"))
     if(!is.numeric(b))
         stop(paste(sQuote("block"),"is not of class", sQuote("numeric")))
     if(length(b)>1)
         stop(paste(sQuote("block"),"has to be only 1 numeric value"))
     if(!is.whole(b))
         stop(paste(sQuote("block"),"has to be an", sQuote("integer")))

     ## Compatibility checks
     if(rb*cb !=length(g))
        stop(paste(sQuote("rb x cb")," should be equal to number of 
treatment", sQuote("g")))
     if(length(g) != rb*cb)
       stop(paste(sQuote("the number of treatments"), "is not equal to", 
sQuote("rb*cb")))

     ## Generate the design
     g<<-g
     genotypes<-times(b) %do% sample(g,length(g))
     #genotypes<-rep(g,b)
     block<-rep(1:b,each=length(g))
     genotypes<-factor(genotypes)
     block<-factor(block)

     ### generate the base design
     k<-c/cb # number of blocks on the x-axis
     x<<-rep(rep(1:r,each=cb),k)  # X-coordinate

     #w<-rb
     l<-cb
     p<-r/rb
     m<-l+1
     d<-l*b/p
     y<<-c(rep(1:l,r),rep(m:d,r)) # Y-coordinate

     ## compact
     matdf<<-data.frame(x,y,block,genotypes)
     N<<-nrow(matdf)
     mm<-summ(matdf)
     ss<-des(matdf)

     ## Identity matrices
     X<<-model.matrix(~block-1)
     h2<<-h2;rhox<<-rhox;rhoy<<-rhoy
     s2g<<-varG(varR=1,h2)
     ## calculate G and Z
     ifelse(ped == "F", 
c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~matdf$genotypes-1)), 
c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
     ## calculate R and IR
     s2e<-1
     ifelse(rhox==0 | rhoy==0, IR<<-(1/s2e)*eye(N), 
IR<<-rspat(rhox=rhox,rhoy=rhoy))
     C11<-t(X)%*%IR%*%X
     C11inv<-solve(C11)
     K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
       return(list(matdf=matdf,summary=mm,description=ss))

   }

#setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
#system.time(out3<-setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out3
#system.time(out4<-setup(b=16,g=seq(1,196,1),rb=14,cb=14,r=56,c=56,h2=0.3,rhox=0.6,rhoy=0.6,ped="F"));out4

####################################################
# The function below uses shortcuts from  textbook by Harville 1997
# uses inverse of a partitioned matrix technique
####################################################

mainF<-function(criteria=c("A","D"))
{
   ### Variance covariance matrices
   temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
   C22<-solve(temp)
   ##########################
   ##   Optimality Criteria
   #########################
   traceI<<-sum(diag(C22)) ## A-Optimality
   doptimI<<-log(det(C22)) # D-Optimality: minimize the det of the 
inverse of Inform Matrix
   #return(c(traceI,doptimI))
       if(criteria=="A") return(traceI)
       if(criteria=="D") return(doptimI)
   else{return(c(traceI,doptimI))}
}

# system.time(res1<-mainF(criteria="A"));res1
# system.time(res2<-mainF(criteria="D"));res2
#system.time(res3<-mainF(criteria="both"));res3

##############################################
### Swap function that takes matdf and returns
## global values newnatdf and design matrices
###    Z and IG
##############################################

swapsimple<-function(matdf,ped="F")
{
   # dataset D =mat1 generated from the above function
   ## now, new design after swapping is
   matdf<-as.data.frame(matdf)
   attach(matdf,warn.conflict=FALSE)
   b1<-sample(matdf$block,1,replace=TRUE);b1
   gg1<-matdf$genotypes[block==b1];gg1
   g1<-sample(gg1,2);g1
   samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
                dimnames=list(NULL,c("gen1","gen2","block")));samp
   newGen<-matdf$genotypes
   newG<-ifelse(matdf$genotypes==samp[,1] & 
block==samp[,3],samp[,2],matdf$genotypes)
   NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
   NewG<-factor(NewG)

   ## now, new design after swapping is
   newmatdf<-cbind(matdf,NewG)
   newmatdf<<-as.data.frame(newmatdf)
   mm<-summ(newmatdf)
   ss<-des(newmatdf)

   ## Identity matrices
    ifelse(ped == "F", 
c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)), 
c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
   ## calculate R and IR
   C11<-t(X)%*%IR%*%X
   C11inv<-solve(C11)
   K<<-IR%*%X%*%C11inv%*%t(X)%*%IR
   return(list(newmatdf=newmatdf,summary=mm,description=ss))
}
#swapsimple(matdf,ped="F")[c(2,3)]
#which(newmatdf$genotypes != newmatdf$NewG)
###########################################
# for one design, swap pairs of treatments
# several times and store the traces
# of the successive swaps
##########################################

optmF<-function(iterations=2,verbose=FALSE)
{
   trace<-c()

   for (k in 1:iterations){
setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
     swapsimple(matdf,ped="F")
     trace[k]<-mainF(criteria="A")
     iterations[k]<-k
     mat<-cbind(trace, iterations= seq(iterations))
    }

   if (verbose){
      cat("***starting matrix\n")
      print(mat)
    }
   # iterate till done
   while(nrow(mat) > 1){
     high <- diff(mat[, 'trace']) > 0
     if (!any(high)) break  # done
     # find which one to delete
     delete <- which.max(high) + 1L
     #mat <- mat[-delete, ]
     mat <- mat[-delete,, drop=FALSE]
   }
   mat
}

#system.time(test1<-optmF(iterations=10));test1

################################################
###############################################

swap<-function(matdf,ped="F",criteria=c("A","D"))
{
   # dataset D =mat1 generated from the above function
   ## now, new design after swapping is
   matdf<-as.data.frame(matdf)
   attach(matdf,warn.conflict=FALSE)
   b1<-sample(matdf$block,1,replace=TRUE);b1
   gg1<-matdf$genotypes[block==b1];gg1
   g1<-sample(gg1,2);g1
   samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
                dimnames=list(NULL,c("gen1","gen2","block")));samp
   newGen<-matdf$genotypes
   newG<-ifelse(matdf$genotypes==samp[,1] & 
block==samp[,3],samp[,2],matdf$genotypes)
   NewG<-ifelse(matdf$genotypes==samp[,2] & block==samp[,3],samp[,1],newG)
   NewG<-factor(NewG)

   ## now, new design after swapping is
   newmatdf<-cbind(matdf,NewG)
   newmatdf<<-as.data.frame(newmatdf)
   mm<-summ(newmatdf)
   ss<-des(newmatdf)

   ## Identity matrices
   #X<<-model.matrix(~block-1)
   #s2g<<-varG(varR=1,h2)
   ## calculate G and Z
   ifelse(ped == "F", 
c(IG<<-(1/s2g)*eye(length(g)),Z<<-model.matrix(~newmatdf$NewG-1)), 
c(IG<<- ZGped(ped)[[1]],Z<<-ZGped(ped)[[2]]))
   ## calculate R and IR
   C11<-t(X)%*%IR%*%X
   C11inv<-solve(C11)
   K<-IR%*%X%*%C11inv%*%t(X)%*%IR
   temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
   C22<-solve(temp)
   ##########################
   ##   Optimality Criteria
   #########################
   traceI<-sum(diag(C22)) ## A-Optimality
   doptimI<-log(det(C22)) #
   #return(c(traceI,doptimI))
   if(criteria=="A") return(traceI)
   if(criteria=="D") return(doptimI)
   else{return(c(traceI,doptimI))}
}

#swap(matdf,ped="F",criteria="both")

###########################################
### Generate 25 initial designs
###########################################
#rspatf<-function(design){
#  arr = array(1, dim=c(nrow(matdf),ncol(matdf)+1,design))
#  l<-list(length=dim(arr)[3])
#  for (i in 1:dim(arr)[3]){
#    l[[i]]<-swapsimple(matdf,ped="F")[[1]][,,i]
#  }
#  l
#}
#matd<-rspatf(design=5)
#matd

#which(matd[[1]]$genotypes != matd[[1]]$NewG)
#which(matd[[2]]$genotypes != matd[[2]]$NewG)

###############################################
###############################################

optm<-function(iterations=2,verbose=FALSE)
{
   trace<-c()

   for (k in 1:iterations){
setup(b=6,g=seq(1,30,1),rb=5,cb=6,r=15,c=12,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")
     trace[k]<-swap(matdf,ped="F",criteria="A")
     iterations[k]<-k
     mat<-cbind(trace, iterations= seq(iterations))
   }

   if (verbose){
     cat("***starting matrix\n")
     print(mat)
   }
   # iterate till done
   while(nrow(mat) > 1){
     high <- diff(mat[, 'trace']) > 0
     if (!any(high)) break  # done
     # find which one to delete
     delete <- which.max(high) + 1L
     #mat <- mat[-delete, ]
     mat <- mat[-delete,, drop=FALSE]
   }
   mat
}

#system.time(res<-optm(iterations=10));res
#################################################
################################################
finalF<-function(designs,swaps)
{
   Nmatdf<-list()
   OP<-list()
   Miny<-NULL
   Maxy<-NULL
   Minx<-NULL
   Maxx<-NULL
   for (i in 1:designs)
   {
setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
     mainF(criteria="A")
     for (j in 1:swaps)
     {
       OP[[i]]<- optmF(iterations=swaps)
       Nmatdf[[i]]<-newmatdf[,5]
       Miny[i]<-min(OP[[i]][,1])
       Maxy[i]<-max(OP[[i]][,1])
       Minx[i]<-min(OP[[i]][,2])
       Maxx[i]<-max(OP[[i]][,2])
     }
   }
return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf)) # 
gives us both the Optimal conditions and designs
}

#################################################
sink(file= paste(format(Sys.time(), 
"Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
system.time(test1<-finalF(designs=25,swaps=2000));test1
sink()

I expect results like this below

>sink()
>finalF<-function(designs,swaps)
+{
+   Nmatdf<-list()
+   OP<-list()
+   Miny<-NULL
+   Maxy<-NULL
+   Minx<-NULL
+   Maxx<-NULL
+   for (i in 1:designs)
+   {
+     setup(b=4,g=seq(1,20,1),rb=5,cb=4,r=10,c=8,h2=0.3,rhox=0.6,rhoy=0.6,ped="F")[1]
+     mainF(criteria="A")
+     for (j in 1:swaps)
+     {
+       OP[[i]]<- optmF(iterations=swaps)
+       Nmatdf[[i]]<-newmatdf[,5]
+       Miny[i]<-min(OP[[i]][,1])
+       Maxy[i]<-max(OP[[i]][,1])
+       Minx[i]<-min(OP[[i]][,2])
+       Maxx[i]<-max(OP[[i]][,2])
+     }
+   }
+   return(list(OP=OP,Miny=Miny,Maxy=Maxy,Minx=Minx,Maxx=Maxx,Nmatdf=Nmatdf)) # gives us both the Optimal conditions and designs
+}
>sink(file= paste(format(Sys.time(), "Final_%a_%b_%d_%Y_%H_%M_%S"),"txt",sep="."),split=TRUE)
>system.time(test1<-finalF(designs=5,swaps=5));test1
    user  system elapsed
   37.88    0.00   38.04
$OP
$OP[[1]]
          trace iterations
[1,] 0.8961335          1
[2,] 0.8952822          3
[3,] 0.8934649          4

$OP[[2]]
         trace iterations
[1,] 0.893955          1

$OP[[3]]
          trace iterations
[1,] 0.9007225          1
[2,] 0.8971837          4
[3,] 0.8902474          5

$OP[[4]]
          trace iterations
[1,] 0.8964726          1
[2,] 0.8951722          4

$OP[[5]]
          trace iterations
[1,] 0.8973285          1
[2,] 0.8922594          4

$Miny
[1] 0.8934649 0.8939550 0.8902474 0.8951722 0.8922594

$Maxy
[1] 0.8961335 0.8939550 0.9007225 0.8964726 0.8973285

$Minx
[1] 1 1 1 1 1

$Maxx
[1] 4 1 5 4 4

$Nmatdf
$Nmatdf[[1]]
   [1] 30 8  5  28 27 29 1  26 24 22 13 6  17 18 2  19 14 11 3  23 10 15 21 9  25 4  7  20 12 16 14 17 15 5  8  6  19
  [38] 4  1  10 11 3  24 20 13 2  27 12 16 28 21 23 30 25 29 7  26 18 9  22 24 21 26 2  13 30 5  28 20 11 3  7  18 25
  [75] 22 16 4  17 19 27 29 10 23 6  12 15 14 1  9  8  12 11 3  8  5  20 23 22 7  15 19 29 24 27 13 2  6  1  21 26 25
[112] 10 16 14 18 4  30 17 9  28 29 9  7  27 11 2  30 18 8  14 19 20 15 21 4  3  16 24 13 28 26 10 12 6  5  25 1  17
[149] 23 22 21 2  23 16 4  10 9  22 30 24 1  27 3  20 12 5  26 17 28 11 7  14 8  25 19 13 18 29 15 6
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[2]]
   [1] 5  13 30 2  21 23 6  27 16 19 8  26 18 4  20 9  22 28 7  3  15 10 11 17 25 24 29 1  14 12 28 18 23 19 21 16 17
  [38] 29 13 7  15 27 25 22 10 1  2  5  30 9  20 3  14 24 26 4  6  12 11 8  8  18 25 12 5  23 21 4  9  17 20 1  2  6
  [75] 22 7  16 26 30 29 3  15 19 14 13 11 24 28 27 10 16 21 26 23 25 4  9  24 15 14 22 1  20 27 2  7  17 18 13 8  12
[112] 5  6  19 28 3  10 30 11 29 11 30 14 9  26 5  1  10 29 28 4  18 8  24 20 13 3  23 27 6  15 16 21 2  17 7  25 12
[149] 19 22 7  28 8  11 26 24 12 29 9  16 21 27 22 23 18 19 13 6  15 3  1  30 2  17 14 5  25 20 4  10
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[3]]
   [1] 7  25 4  30 12 11 14 13 26 1  10 21 15 22 29 19 27 16 2  24 28 20 3  5  23 8  18 6  17 9  6  21 9  15 11 17 13
  [38] 29 24 4  20 7  23 14 2  16 18 26 19 25 8  1  12 10 28 27 22 30 5  3  20 12 8  2  11 18 24 19 9  22 15 7  30 27
  [75] 17 29 6  3  5  1  21 25 28 14 23 4  16 26 13 10 20 29 26 25 15 22 9  10 28 17 18 21 6  16 7  1  3  24 11 2  4
[112] 14 8  5  13 27 23 30 19 12 6  30 1  2  7  28 18 8  20 10 4  25 14 19 27 11 13 29 12 9  3  26 22 21 16 15 17 24
[149] 5  23 17 6  25 11 21 29 5  26 13 7  15 2  9  4  18 30 3  8  20 24 27 22 19 16 28 12 1  23 14 10
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[4]]
   [1] 24 8  17 30 10 20 4  28 25 16 14 13 7  12 26 29 21 19 1  22 11 6  23 18 15 5  27 2  3  9  1  24 27 15 26 14 28
  [38] 20 8  5  4  29 2  25 9  13 6  21 7  22 30 17 3  10 12 19 11 18 16 23 25 18 3  29 1  4  8  6  9  30 2  14 11 16
  [75] 23 13 10 12 7  19 17 5  21 28 24 20 15 27 26 22 14 5  7  6  17 3  1  29 25 23 19 11 21 18 4  30 20 8  2  12 9
[112] 16 10 15 27 26 13 24 28 22 19 7  17 1  12 8  18 16 14 22 3  28 27 25 10 6  4  15 30 9  11 5  20 26 24 29 21 2
[149] 23 13 2  16 10 25 18 15 26 22 12 19 30 17 23 8  3  7  20 14 13 28 9  21 11 29 6  5  4  24 27 1
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

$Nmatdf[[5]]
   [1] 12 18 8  22 9  21 2  1  29 13 30 25 17 6  16 5  26 7  3  14 23 15 28 27 10 24 20 11 19 4  20 30 14 27 25 4  6
  [38] 28 23 8  9  29 26 19 24 7  5  1  11 22 21 2  10 18 12 15 3  17 13 16 16 22 6  9  21 5  14 2  30 10 3  25 27 15
  [75] 28 7  17 20 11 8  19 29 12 26 24 13 1  4  18 23 4  16 10 25 5  13 18 19 22 7  28 30 23 21 11 2  14 9  20 24 8
[112] 17 1  15 29 6  12 27 3  26 14 8  26 6  20 9  15 23 3  22 7  30 25 24 1  10 19 21 4  11 2  18 17 13 28 29 27 16
[149] 12 5  19 2  4  5  15 21 17 7  25 8  6  16 20 29 10 18 1  12 26 28 27 11 14 23 22 9  3  13 30 24
Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30