[R] Obtaining data from a different row of data frame

arun smartpink111 at yahoo.com
Sun Sep 22 08:27:41 CEST 2013


HI,

A modified code to avoid the ?sapply()
df1<- structure(list(Dates = structure(c(13151, 13152, 13153, 13154,
 13157, 13158, 13159, 13160, 13161, 13164), class = "Date"), P1 = c(10,
 13, 16, 19, 22, 25, 28, 31, 34, 37), P2 = c(100, 102, 104, 106,
 108, 110, 112, 114, 116, 118), P3 = c(90, 94, 98, 102, 106, 110,
 114, 118, 122, 126), P4 = c(70, 75, 80, 85, 90, 95, 100, 105,
 110, 115), OF1 = c(3, 3, 4, 5, 2, 2, 2, 1, 1, 5), OF2 = c(5,
 3, 4, 2, 1, 2, 2, 1, 1, 0), OF3 = c(4, 3, 4, 1, 3, 2, 2, 1, 1,
 0), OF4 = c(3, 5, 4, 2, 3, 1, 2, 1, 1, 0)), .Names = c("Dates",
 "P1", "P2", "P3", "P4", "OF1", "OF2", "OF3", "OF4"), row.names = c(NA,
 -10L), class = "data.frame")
df1$OF2[9]<-4

df2<- df1
 df2[,10:13]<- NA
colnames(df2)[10:13]<- paste0("newPrice",1:4)

##your code

for(j in 2:5) {
 df2[j+8] = df2[df2[,j+4] + row(df2)[,j], j]
 }
indx1<- unlist(df1[,grep("OF",colnames(df1))],use.names=FALSE)
 indx1[rep(seq(nrow(df1)),4)%in% 6:10][indx1[rep(seq(nrow(df1)),4)%in% 6:10]- rep(5:1,4)>=0]<- NA

val1<- unlist(df1[,grep("P",colnames(df1))],use.names=FALSE)
 df1[,10:13]<- val1[indx1+seq_along(indx1)]
 colnames(df1)[10:13]<- colnames(df2)[10:13]
identical(df1[,10:13],df2[,10:13])
#[1] TRUE


###On a bigger dataset:
set.seed(29)
 df2<- data.frame(Dates=seq(as.Date("2006-01-03"),length.out=2000,by="1 day"),cbind(matrix(sample(10:120,2000*300,replace=TRUE),ncol=300),matrix(sample(0:6,2000*300,replace=TRUE),ncol=300)))
 colnames(df2)[2:301]<- paste0("P",1:300)
 colnames(df2)[302:601]<- paste0("OF",1:300)
 df3<- df2


df2[,602:901]<-NA
 colnames(df2)[602:901]<- paste0("newPrice",1:300)
 system.time({
 for(j in grep("^P",colnames(df2))) {
  df2[j+600] = df2[df2[,j+300] + row(df2)[,j], j]
  }
 })
#   user  system elapsed
 #  8.508   0.000   8.523 


colN_OF<- ncol(df3[,grep("OF",colnames(df3))])
system.time({
 indx1<- unlist(df3[,grep("OF",colnames(df3))],use.names=FALSE)
 indx1[rep(seq(nrow(df3)),colN_OF) %in% 1995:2000][indx1[rep(seq(nrow(df3)),colN_OF) %in% 1995:2000] - rep(6:1,colN_OF)>=0] <-NA
  val1<- unlist(df3[,grep("P",colnames(df3))],use.names=FALSE)
  df3[,602:901]<- val1[indx1+seq_along(indx1)]
  colnames(df3)[602:901]<- colnames(df2)[602:901]
 })
#  user  system elapsed 
#  0.568   0.000   0.569 

 identical(df2,df3)
#[1] TRUE


A.K.





----- Original Message -----
From: arun <smartpink111 at yahoo.com>
To: Ira Sharenow <irasharenow100 at yahoo.com>
Cc: 
Sent: Sunday, September 22, 2013 1:28 AM
Subject: Re: [R] Obtaining data from a different row of data frame

Ira,

I tried with a bigger dataset to look for any errors in the code:
set.seed(29)
 df2<- data.frame(Dates=seq(as.Date("2006-01-03"),length.out=2000,by="1 day"),cbind(matrix(sample(10:120,2000*300,replace=TRUE),ncol=300),matrix(sample(0:6,2000*300,replace=TRUE),ncol=300)))
 colnames(df2)[2:301]<- paste0("P",1:300)
 colnames(df2)[302:601]<- paste0("OF",1:300)
 df3<- df2

df2[,602:901]<-NA
 colnames(df2)[602:901]<- paste0("newPrice",1:300)
 system.time({
 for(j in grep("^P",colnames(df2))) {
  df2[j+600] = df2[df2[,j+300] + row(df2)[,j], j]
  }
 })
#   user  system elapsed 
 # 9.584   0.000   9.601 



vec1<- 6:1 ##change values according to the range of actual values in your rows.
 vec2<- 1995:2000 ##change accordingly. If the maximum value is say 100, take 100 rows from the tail end.  Change the vec1 also so that both are of the same length


system.time({
 df3[vec2,grep("OF",colnames(df3))]<- t(sapply(seq_along(vec1),function(i) {x1<-as.matrix(df3[vec2[i],grep("OF",colnames(df3))]); x1[x1>=vec1[i]]<-NA; x1}))
 indx1<- unlist(df3[,grep("OF",colnames(df3))],use.names=FALSE)
 val1<- unlist(df3[,grep("P",colnames(df3))],use.names=FALSE)
  df3[,602:901]<- val1[indx1+seq_along(indx1)]
  colnames(df3)[602:901]<- colnames(df2)[602:901]
 })
#   user  system elapsed 
 # 0.552   0.000   0.553 

identical(df2[,602:901],df3[,602:901])
#[1] TRUE


A.K.



More information about the R-help mailing list