[R] How can I avoid the for and If loops in my function?

Mramba,Lazarus K lmramba at ufl.edu
Wed Jun 18 21:14:17 CEST 2014


Hi R-users,

### reproducible example:

library(gmp)
library(matlab)
library(Matrix)
library(foreach)
library(MASS)
library(mvtnorm)

#####################################
## A function to create a field experiment
## Returns a data frame, called matdf
######################################

setup<-function(b,g,rb,cb,r,c)
{
   # where
   # b   = number of blocks
   # g   = number of treatments per block
   # rb  = number of rows per block
   # cb  = number of columns per block
   # r   = total number of rows for the layout
   # c   = total number of columns for the layout

   ### Check points
   stopifnot(is.numeric(b),is.whole(b),is.numeric(g),g>1)
   ## Compatibility checks
   genot<<-seq(1,g,1)
   stopifnot(rb*cb==length(genot),r/rb * c/cb == b)
   ## Generate the design
   genotypes<-times(b) %do% sample(genot,g)
   block<-rep(1:b,each=length(genot))
   genotypes<-factor(genotypes)
   block<-factor(block)
   ### generate the base design
   k<-c/cb # number of blocks on the x-axis
   y<-rep(rep(1:r,each=cb),k)  # X-coordinate
   #w<-rb
   l<-cb
   p<-r/rb
   m<-l+1
   d<-l*b/p
   x<-c(rep(1:l,r),rep(m:d,r)) # Y-coordinate
   ## compact
   matdf<-data.frame(x,y,block,genotypes)
   matdf[order(matdf$x),]
}

#########################################################################
## a function to calculate trace from the original data frame
## Returns trace of the variance-covariance matrix, named C22
######################################################################

mainF<-function(matdf,h2=h2,rhox=rhox,rhoy=rhoy,ped="F",s2e=1,c=c,r=r)
{
   N<-nrow(matdf)
   ## Identity matrices
   X<<-model.matrix(~matdf$block)
   s2g<-varG(s2e,h2)
   ## calculate G and Z
   IG<<-(1/s2g)*eye(length(genot))
   z<-model.matrix(~matdf$genotypes-1) # changes everytime there is a 
swap
   ## calculate R and IR
   # rhox=rhoy=0.3;s2e=1

   # # calculate R and IR
   sigx <- diag(c)
   sigx <- rhox^ abs(row(sigx) - col(sigx))
   sigy <- diag(r)
   sigy <- rhoy ^ abs(row(sigy) - col(sigy))
   R<- s2e * kronecker(sigx, sigy)  # takes 0.01 second
   ################
   # find inverse of R by choleski decomposition
   IR<<-chol2inv(chol(R)) # takes about 20 seconds

   ####
   #### brute force matrix multiplication
   C11<-t(X)%*%IR%*%X
   C11inv<-chol2inv(chol(C11))
   k1<<-IR%*%X # 0.2 seconds
   k2<-C11inv%*%t(X) # 0 seconds
   k3<-k2%*%IR # 0.2 seconds
   K<<-k1%*%k3 # 0.16 seconds

   ### Variance covariance matrices
   temp<-t(z)%*%IR%*%z+IG - t(z)%*%K%*%z

   C22<-chol2inv(chol(temp))
   ##########################
   ##   Optimality Criteria
   #########################
   traceI=sum(diag(C22)) # A-Optimality
   traceI
}

## ################################################
### My function to randomly swap two elements in the same block of a 
dataframe
### returns a dataframe, called newmatdf
####################################################
swapsimple<-function(matdf)
{
   ## now, new design after swapping is
   attach(matdf,warn.conflict=FALSE)
   b1<-sample(matdf$block,1,replace=TRUE);b1
   gg1<-matdf$genotypes[block==b1];gg1
   g1<-sample(gg1,2);g1
   samp<-Matrix(c(g1=g1,block=b1),nrow=1,ncol=3,
                dimnames=list(NULL,c("gen1","gen2","block")));samp
   newGen<-matdf$genotypes
   newG<-ifelse(matdf$genotypes==samp[,1] & 
block==samp[,3],samp[,2],matdf$genotypes)
   NewG<-ifelse(matdf$genotypes==samp[,2] & 
block==samp[,3],samp[,1],newG)
   NewG<-factor(NewG)
   ## now, new design after swapping is
   newmatdf<-cbind(matdf,NewG)
   newmatdf<-as.data.frame(newmatdf)
   names(newmatdf)[names(newmatdf)=="genotypes"] <- "old_G"
   names(newmatdf)[names(newmatdf)=="NewG"] <- "genotypes"
   #newmatdf <- remove.vars(newmatdf, "old_G")
   newmatdf$old_G <- newmatdf$old_G <- NULL
   newmatdf[order(newmatdf$x),]
}

#############################################################
### My function to re-calculate trace after swaping the pairs of 
elements
################################################

swapmainF<-function(newmatdf)
{
   Z<-model.matrix(~newmatdf$genotypes-1)
   ### Variance covariance matrices
   temp<-t(Z)%*%IR%*%Z+IG - t(Z)%*%K%*%Z
   C22<-chol2inv(chol(temp))
   ##########################
   ##   Optimality Criteria
   #########################
   traceI=sum(diag(C22)) # A-Optimality
   traceI
}

#######################################
#I need help in the function below
## I need to avoid the for loops and if loops here :
########################################################


########################################################
#Take an original matrix, called matrix0, calculate its trace. Generate
#a new matrix, called newmatdf after  swapping two elements of the  old
#one and  calculate the trace. If the trace of the newmatrix is smaller
#than that of the previous matrix, store both the current trace 
together with
#the older trace and their  iteration values. If the newer matrix has a
#trace larger than the previous trace, drop this trace and drop this
#matrix too (but count its iteration).
#Re-swap the old matrix that you stored previously and recalculate the
#trace. Repeat the process many times, say 10,000. The final results 
should be a list
#with the original initial matrix and its trace, the final best
#matrix that had the smallest trace after the 10000 simulations and a
#dataframe  showing the values of the accepted traces that
#were smaller than the previous and their respective iterations.
####################################################################

funF<- function(newmatdf,n,traceI)
{
   matrix0<-newmatdf
   trace<-traceI
   res <- list(mat = NULL, Design_best = newmatdf, Original_design = 
matrix0)
   res$mat <- rbind(res$mat, c(value = trace, iterations = 0))
   Des<-list()
   for(i in seq_len(n)){
     ifelse(i==1, 
newmatdf<-swapsimple(matrix0),newmatdf<-swapsimple(newmatdf))
     Des[[i]]<-newmatdf
     if(swapmainF(newmatdf) < trace){
       newmatdf<-Des[[i]]
       Des[[i]]<-newmatdf
       trace<- swapmainF(newmatdf)
       res$mat <- rbind(res$mat, c(trace = trace, iterations = i))
       res$Design_best <- newmatdf
     }
     if(swapmainF(newmatdf) > trace & nrow(res$mat)<=1){
       newmatdf<-matrix0
       Des[[i]]<-matrix0
       res$Design_best<-matrix0
     }
     if(swapmainF(newmatdf)> trace & nrow(res$mat)>1){
       newmatdf<-Des[[length(Des)-1]]
       Des[[i]]<-newmatdf
       res$Design_best<-newmatdf
     }
   }
   res
}



######################################
## Call the function setup, generate 100 designs,
## calculate their traces using the function mainF,
## choose the dataframe with the smallest trace
## store only that dataframe and its trace for later use
######################################
M2F<-function(D,ped="F",k=1,b,g,rb,cb,r,c,
               h2,rhox,rhoy,s2e=1)
{
   matrix0<-list()
   start0 <- c()
   value0 <- c()
   for (i in 1:D)
   {
     print(sprintf("generating initial design: %d", i, "complete\n", 
sep=""))
     flush.console()
     matrix0[[i]]<-setup(b=b,g=g,rb=rb,cb=cb,r=r,c=c)
     
start0[i]<-mainF(matdf=matrix0[[i]],h2=h2,rhox=rhox,rhoy=rhoy,s2e=s2e,c=c,r=r)
     s<-which.min(start0)
     newmatdf<-matrix0[s][[1]]
     trace0<-start0[s][[1]]
   }
   list(newmatdf=newmatdf,start0=start0,trace0=trace0,index=s)
}

################################################
####  Test my functions ### works perfectly but takes too long
###########################################

b=16;g=196;rb=14;cb=14;r=56;c=56;h2=0.1;rhox=0.3;rhoy=0.3
h2=0.1;rhox=0.3;rhoy=0.3;s2e=1
#

tic() # takes  42.020000 seconds  for D==2. but for D==100 , takes 
about 30 minutes !!!
res1<-M2F(D=2,ped="F",k=1,b=b,g=g,rb=rb,cb=cb,r=r,c=c,
            h2=0.1,rhox=0.3,rhoy=0.3,s2e=1)
toc()

tic() # takes 37.720000 seconds for n==5 but I need for n==4000 or more 
takes >7hours
ans1<-funF(res1$newmatdf,traceI=res1$trace0,n=5)
toc()

ans1$mat

regards,
Laz



More information about the R-help mailing list