[R] R help

arun smartpink111 at yahoo.com
Thu Mar 20 16:11:14 CET 2014


Hi,
It is better to use ?dput() to show the data.dput(dataset)

dat <- 

structure(list(customer_id = c(8L, 33L, 12L), CountryName = c("US", 
"CA", "UK"), RevenueWeekN00 = c(2.28, 0, 30.18), RevenueWeekN01 = c(9.57, 
14.69, 43.9), RevenueWeekN02 = c(7.54, 3.31, 90.4), RevenueWeekN03 = c(8.99, 
5.21, 45), RevenueWeekN04 = c(21.61, 1.95, 2.9), RevenueWeekN05 = c(24.46, 
1.51, 4.12), RevenueWeekN06 = c(19.45, 1.85, 19.72), RevenueWeekN07 = c(120.56, 
1.96, 30.8), RevenueWeekN08 = c(0.02, 4.88, 102.6), RevenueWeekN09 = c(0.15, 
3.55, 55.09), RevenueWeekN10 = c(0, 3.74, 25.3), RevenueWeekN11 = c(0, 
4.5, 4.6)), .Names = c("customer_id", "CountryName", "RevenueWeekN00", 
"RevenueWeekN01", "RevenueWeekN02", "RevenueWeekN03", "RevenueWeekN04", 
"RevenueWeekN05", "RevenueWeekN06", "RevenueWeekN07", "RevenueWeekN08", 
"RevenueWeekN09", "RevenueWeekN10", "RevenueWeekN11"), class = "data.frame", row.names = c(NA, 
-3L))


###Your expected output
res <- structure(list(customer_id = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 33L, 
33L, 33L, 33L), CountryName = c("US", "US", "US", "US", "US", 
"US", "US", "US", "US", "US", "US", "US", "CA", "CA", "CA", "CA", 
"CA", "CA", "CA", "CA", "CA", "CA", "CA", "CA"), weekdatesunday = c(0L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 0L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), RevenueWeekN00 = c(2.28, 9.57, 
7.54, 8.99, 21.61, 24.46, 19.45, 120.56, 0.02, 0.15, 0, 0, 0, 
14.69, 3.31, 5.21, 1.95, 1.51, 1.85, 1.96, 4.88, 3.55, 3.74, 
4.5), RevenueWeekN01 = c(0, 2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 
19.45, 120.56, 0.02, 0.15, 0, 0, 0, 14.69, 3.31, 5.21, 1.95, 
1.51, 1.85, 1.96, 4.88, 3.55, 3.74), RevenueWeekN02 = c(0, 0, 
2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 19.45, 120.56, 0.02, 0.15, 
0, 0, 0, 14.69, 3.31, 5.21, 1.95, 1.51, 1.85, 1.96, 4.88, 3.55
), RevenueWeekN03 = c(0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 
24.46, 19.45, 120.56, 0.02, 0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95, 
1.51, 1.85, 1.96, 4.88), RevenueWeekN04 = c(0, 0, 0, 0, 2.28, 
9.57, 7.54, 8.99, 21.61, 24.46, 19.45, 120.56, 0, 0, 0, 0, 0, 
14.69, 3.31, 5.21, 1.95, 1.51, 1.85, 1.96), RevenueWeekN05 = c(0, 
0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 19.45, 0, 0, 
0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95, 1.51, 1.85), RevenueWeekN06 = c(0, 
0, 0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 24.46, 0, 0, 0, 
0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95, 1.51), RevenueWeekN07 = c(0, 
0, 0, 0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 21.61, 0, 0, 0, 0, 
0, 0, 0, 0, 14.69, 3.31, 5.21, 1.95), RevenueWeekN08 = c(0, 0, 
0, 0, 0, 0, 0, 0, 2.28, 9.57, 7.54, 8.99, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 14.69, 3.31, 5.21), RevenueWeekN09 = c(0, 0, 0, 0, 0, 0, 
0, 0, 0, 2.28, 9.57, 7.54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14.69, 
3.31), RevenueWeekN10 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.28, 
9.57, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14.69), RevenueWeekN11 = c(0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2.28, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0)), .Names = c("customer_id", "CountryName", "weekdatesunday", 
"RevenueWeekN00", "RevenueWeekN01", "RevenueWeekN02", "RevenueWeekN03", 
"RevenueWeekN04", "RevenueWeekN05", "RevenueWeekN06", "RevenueWeekN07", 
"RevenueWeekN08", "RevenueWeekN09", "RevenueWeekN10", "RevenueWeekN11"
), class = "data.frame", row.names = c(NA, -24L))


dat1 <- dat
names(dat1)[-(1:2)] <- gsub("([[:alpha:]]+)(\\d+)","\\1_\\2",names(dat1)[-(1:2)])
 dat2 <- reshape(dat1,idvar=1:2,sep="_",direction="long",varying=names(dat1)[-(1:2)],timevar="weekdatesunday")

dat3 <-  dat2[with(dat2,order(factor(CountryName,levels=dat1$CountryName),customer_id)),]
row.names(dat3) <- 1:nrow(dat3)
colnames(dat3)[4] <- paste0(colnames(dat3)[4], "00") 

#Better would be to use a ?for() loop.  If you only need 12 lags:
library(plyr)

dat4 <- ddply(dat3,.(CountryName),mutate,RevenueWeekN01=c(0,head(RevenueWeekN00,-1)), RevenueWeekN02=c(0,head(RevenueWeekN01,-1)), RevenueWeekN03=c(0,head(RevenueWeekN02,-1)), RevenueWeekN04=c(0,head(RevenueWeekN03,-1)), RevenueWeekN05=c(0,head(RevenueWeekN04,-1)), RevenueWeekN06=c(0,head(RevenueWeekN05,-1)), RevenueWeekN07=c(0,head(RevenueWeekN06,-1)), RevenueWeekN08=c(0,head(RevenueWeekN07,-1)), RevenueWeekN09=c(0,head(RevenueWeekN08,-1)), RevenueWeekN10=c(0,head(RevenueWeekN09,-1)), RevenueWeekN11=c(0,head(RevenueWeekN10,-1)))
dat5 <- dat4[with(dat4,order(factor(CountryName,levels=dat1$CountryName),customer_id)),]
row.names(dat5) <- 1:nrow(dat5)
 all.equal(res, dat5[1:24,])
#[1] TRUE
A.K.





On Thursday, March 20, 2014 6:22 AM, Malyadri Putchakayala <malyadri.putchakayala at nuevora.com> wrote:

Hi,
if u doen't mind plz...help me lagitude Transpose,the data is give below 

customer_id    CountryName    RevenueWeekN00    RevenueWeekN01
RevenueWeekN02    RevenueWeekN03    RevenueWeekN04    RevenueWeekN05
RevenueWeekN06    RevenueWeekN07    RevenueWeekN08    RevenueWeekN09
RevenueWeekN10    RevenueWeekN11
8    US    2.28    9.57    7.54    8.99    21.61    24.46    19.45
120.56    0.02    0.15    0    0
33    CA    0    14.69    3.31    5.21    1.95    1.51    1.85    1.96
4.88    3.55    3.74    4.5
12    UK    30.18    43.9    90.4    45    2.9    4.12    19.72    30.8
102.6    55.09    25.30    4.6    

after transpose output is 

customer_id    CountryName    weekdatesunday    RevenueWeekN00
RevenueWeekN01    RevenueWeekN02    RevenueWeekN03    RevenueWeekN04
RevenueWeekN05    RevenueWeekN06    RevenueWeekN07    RevenueWeekN08
RevenueWeekN09    RevenueWeekN10    RevenueWeekN11
8    US    0    2.28    0    0    0    0    0    0
0    0    0    0    0
8    US    1    9.57    2.28    0    0    0    0    0
0    0    0    0    0
8    US    2    7.54    9.57    2.28    0    0    0    0
0    0    0    0    0
8    US    3    8.99    7.54    9.57    2.28    0    0    0
0    0    0    0    0
8    US    4    21.61    8.99    7.54    9.57    2.28    0    0
0    0    0    0    0
8    US    5    24.46    21.61    8.99    7.54    9.57    2.28    0
0    0    0    0    0
8    US    6    19.45    24.46    21.61    8.99    7.54    9.57    2.28
0    0    0    0    0
8    US    7    120.56    19.45    24.46    21.61    8.99    7.54    9.57
2.28    0    0    0    0
8    US    8    0.02    120.56    19.45    24.46    21.61    8.99    7.54
9.57    2.28    0    0    0
8    US    9    0.15    0.02    120.56    19.45    24.46    21.61    8.99
7.54    9.57    2.28    0    0
8    US    10    0    0.15    0.02    120.56    19.45    24.46
21.61    8.99    7.54    9.57    2.28    0
8    US    11    0    0    0.15    0.02    120.56    19.45
24.46    21.61    8.99    7.54    9.57    2.28
33    CA    0    0    0    0    0    0    0    0
0    0    0    0    0
33    CA    1    14.69    0    0    0    0    0    0
0    0    0    0    0
33    CA    2    3.31    14.69    0    0    0    0    0
0    0    0    0    0
33    CA    3    5.21    3.31    14.69    0    0    0    0
0    0    0    0    0
33    CA    4    1.95    5.21    3.31    14.69    0    0    0
0    0    0    0    0
33    CA    5    1.51    1.95    5.21    3.31    14.69    0    0
0    0    0    0    0
33    CA    6    1.85    1.51    1.95    5.21    3.31    14.69    0
0    0    0    0    0
33    CA    7    1.96    1.85    1.51    1.95    5.21    3.31
14.69    0    0    0    0    0
33    CA    8    4.88    1.96    1.85    1.51    1.95    5.21    3.31
14.69    0    0    0    0
33    CA    9    3.55    4.88    1.96    1.85    1.51    1.95    5.21
3.31    14.69    0    0    0
33    CA    10    3.74    3.55    4.88    1.96    1.85    1.51    1.95
5.21    3.31    14.69    0    0
33    CA    11    4.5    3.74    3.55    4.88    1.96    1.85    1.51
1.95    5.21    3.31    14.69    0

above output add newcolumn weekdatesunday is seq of 0:11 each record




More information about the R-help mailing list