[R] foreach {parallel} nested with for loop to update data.frame column

Alexander.Herr at csiro.au Alexander.Herr at csiro.au
Tue Aug 9 07:38:11 CEST 2016


Actually, you'll need to identify the values of the foreach loop in the for loop for it to work...

require(doParallel)
require(foreach)
set.seed(666)
xyz<-as.data.frame(cbind(x=rep(rpois(50000,10),2)+1, y=rep(rpois(50000,10),2)+1,z=round(runif(100000, min=-3, max=40),2)))
xyz$mins<-rep(NA, nrow(xyz))
xyz[order(xyz[,1],xyz[,2], xyz[,3]),]->xyz

cl<-makeCluster(4)  #adjust to your cluster number
registerDoParallel(cl)
test<-foreach(i=unique(xyz[,1]), .combine=rbind, .verbose=T) %dopar% {
     for(j in unique(xyz[xyz[,1] == i,2] )) {                                                                           # here ensure you pass on the right data 
         xyz[xyz[,2] == j & xyz[,1] == i ,4]<-min(xyz[xyz[,2] == j & xyz[,1] == i,3])  # otherwise there are inf values here
        nr=nrow(xyz[xyz[,2] == j & xyz[,1] == i ,4])
        }
        return(xyz[xyz[,1]== i,])  # you must return what you are farming out...
}
test[1:15,]
stopCluster(cl)


XXXXXXXXXXXXXXXXXXXXX Herry wrote XXXXXXXXXXXXXXXXXX

Hiya,

This now works...

test<-foreach(i=unique(xyz[,1]), .combine=rbind, .verbose=T) %dopar% {
         for( j in unique(xyz[,2])) {
         xyz[xyz[,2] == j & xyz[,1] == i ,4]<-min(xyz[xyz[,2] == j & xyz[,1] == i,3])
        nr=nrow(xyz[xyz[,2] == j & xyz[,1] == i ,4])
        }
        return(xyz[xyz[,1]== i,])  # you must return what you are farming out...
}
head(test)



More information about the R-help mailing list