[R] cumulative sum by group and under some criteria

arun smartpink111 at yahoo.com
Tue Mar 12 08:17:19 CET 2013



fun1<- function(maxN,p1L,p1H,p0L,p0H,c11,c12,c1,c2,beta,alpha){
d<-do.call(rbind,lapply(2:(maxN-6),function(m1)
do.call(rbind,lapply(2:(maxN-m1-4),function(n1)
do.call(rbind,lapply(0:m1,function(x1)
do.call(rbind,lapply(0:n1,function(y1)
data.frame(m1,n1,x1,y1)))))))))
colnames(d)<-c("m1","n1","x1","y1")
d1<-within(d,{
term1_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)
term1_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)
}) 
set.seed(8)
d2<-do.call(rbind,lapply(seq_len(nrow(d)),function(i){
Pm<- rbeta(1000,0.2+d[i,"x1"],0.8+d[i,"m1"]-d[i,"x1"]);
Pn<- rbeta(1000,0.2+d[i,"y1"],0.8+d[i,"n1"]-d[i,"y1"]);
Fm<- ecdf(Pm);
Fn<- ecdf(Pn);
#Fmm<- Fm(d[i,"p11"]);
#Fnn<- Fn(d[i,"p12"]);
Fmm<- Fm(p1L);
Fnn<- Fn(p1H);
R<- (Fmm+Fnn)/2;
Fmm_f<- max(R, Fmm);
Fnn_f<- min(R, Fnn);
Qm<- 1-Fmm_f;
Qn<- 1-Fnn_f;
data.frame(Qm,Qn)}))
d2<-cbind(d1,d2)

library(zoo)
lst1<- split(d2,list(d$m1,d$n1))
d2New<-do.call(rbind,lapply(lst1[lapply(lst1,nrow)!=0],function(x){
x[,9:14]<-NA;
x[,9:10][x$Qm<=c11,]<-cumsum(x[,5:6][x$Qm<=c11,]);
x[,11:12][x$Qn<=c12,]<-cumsum(x[,5:6][x$Qn<=c12,]);
x[,13:14]<-cumsum(x[,5:6]);
colnames(x)[9:14]<- c("cterm1_P0L","cterm1_P1L","cterm1_P0H","cterm1_P1H","sumTerm1_p0","sumTerm1_p1");
x1<-na.locf(x);
x1[,9:14][is.na(x1[,9:14])]<-0;
x1}
))
row.names(d2New)<-1:nrow(d2New)

res1<-aggregate(.~m1+n1,data=d2New[,c(1:2,9:12)],max)
d3<-res1[res1[,4]<=beta & res1[,6]<beta,]
f1<-function(dat){
stopifnot(nrow(dat)!=0)
library(plyr)
join(dat,d2New,type="inner")
}
d4<- f1(d3)
res2<-do.call(rbind,lapply(unique(d4$m1),function(m1)
do.call(rbind,lapply(unique(d4$n1),function(n1)
do.call(rbind,lapply(unique(d4$x1),function(x1)
do.call(rbind,lapply(unique(d4$y1),function(y1)
 
#do.call(rbind,lapply(0:m1,function(x1)
#do.call(rbind,lapply(0:n1,function(y1)
do.call(rbind,lapply((m1+2):(maxN-2-n1),function(m)
do.call(rbind,lapply((n1+2):(maxN-m),function(n)
do.call(rbind,lapply(x1:(x1+m-m1), function(x)
do.call(rbind,lapply(y1:(y1+n-n1), function(y)
data.frame(m1,n1,x1,y1,m,n,x,y)) )))))))))))))))
colnames(res2)<- c("m1","n1","x1","y1","m","n","x","y")
res3<- join(res2,d4,by=c("m1","n1","x1","y1"),type="inner")
res3<-res3[,c(1:16)]

res3New<- within(res3,{
term2_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})

res4<-do.call(rbind,lapply(seq_len(nrow(res3New)),function(i){
Pm2<-rbeta(1000,0.2+res3[i,"x"],0.8+res3[i,"m"]-res3[i,"x"]);
Pn2<- rbeta(1000,0.2+res3[i,"y"],0.8+res3[i,"n"]-res3[i,"y"]);
Fm2<- ecdf(Pm2);
Fn2<- ecdf(Pn2);
#Fmm2<- Fm2(res3[i,"p1"]);
#Fnn2<- Fn2(res3[i,"p2"]);
Fmm2<- Fm2(p1L);
Fnn2<- Fn2(p1H);
R2<- (Fmm2+Fnn2)/2;
Fmm_f2<- max(R2, Fmm2);
Fnn_f2<- min(R2, Fnn2);
Qm2<- 1-Fmm_f2;
Qn2<- 1-Fnn_f2;
data.frame(Qm2,Qn2)}))

res5<-cbind(res3New,res4)
lst2<- split(res5,list(res5$m1,res5$n1,res5$m,res5$n))
 
res5New<-do.call(rbind,lapply(lst2[lapply(lst2,nrow)!=0],
function(x){
x[,21:26]<-NA;
x[,21:22][x$Qm2<=c1 & x$Qm>c11,]<-cumsum(x[,17:18][x$Qm2<=c1 & x$Qm>c11,]);
x[,23:24][x$Qn2<=c2 & x$Qn>c12,]<-cumsum(x[,17:18][x$Qn2<=c2 & x$Qn>c12,]);
x[,25:26]<-cumsum(x[,17:18]);
colnames(x)[21:26]<- c("cterm2_P1L","cterm2_P0L","cterm2_P1H","cterm2_P0H","sumT2p0","sumT2p1");
x2<-na.locf(x);
x2[,21:26][is.na(x2[,21:26])]<-0; x2}
))
row.names(res5New)<-1:nrow(res5New)
res6<-aggregate(.~m1+n1+m+n,data=res5New[,c(1:6,9:12,21:24)] ,max)
res6New<-within(res6,{AL<-1-(cterm1_P0L + cterm2_P0L);
AH<-1-(cterm1_P0H + cterm2_P0H);
BL<-(cterm1_P1L + cterm2_P1L);
BH<-(cterm1_P1H + cterm2_P1H);
EN<- m1 + (m-m1)*(1-cterm1_P0L) + n1 + (n-n1)*(1-cterm1_P0H);
N<-m+n
})
res7<-res6New[,c(1:4,7,9,15:20)]
res7New<-res7[res7[,9]<=beta & res7[,10]<=beta &res7[,11] <= alpha & res7[,12] <= alpha,]
return(res7New)
}

###Different scenarios
####1
 fun1(10,0.25,0.25,0.05,0.05,0.07,0.07,0.15,0.20,0.6,0.6)
#Joining by: m1, n1, cterm1_P0L, cterm1_P1L, cterm1_P0H, cterm1_P1H
#   m1 n1 m n cterm1_P0L cterm1_P0H  N       EN        BH        BL        AH
#8   2  3 4 5  0.7737809  0.7737809  9 5.904876 0.2373047 0.4202271 0.2262191
#11  2  3 5 5  0.7737809  0.7737809 10 6.131095 0.2748470 0.3744965 0.1631941
#12  3  3 5 5  0.8573750  0.7350919 10 6.815066 0.2342920 0.4218750 0.1703707
#14  2  3 4 6  0.7737809  0.7737809 10 6.131095 0.2373047 0.4202271 0.2262191
#15  2  4 4 6  0.7350919  0.7350919 10 7.059632 0.1779785 0.3942719 0.2649081
#          AL
#8  0.1100501
#11 0.1158586
#12 0.1426250
#14 0.1100501
#15 0.1138223


####2
fun1(10,0.25,0.25,0.05,0.05,0.07,0.07,0.15,0.20,0.1,0.1)
#Error: nrow(dat) != 0 is not TRUE

####3 running again previous case
fun1(10,0.25,0.25,0.05,0.05,0.07,0.07,0.15,0.20,0.6,0.6)
Joining by: m1, n1, cterm1_P0L, cterm1_P1L, cterm1_P0H, cterm1_P1H
#   m1 n1 m n cterm1_P0L cterm1_P0H  N       EN        BH        BL        AH
#8   2  3 4 5  0.7737809  0.7737809  9 5.904876 0.2373047 0.4202271 0.2262191
#11  2  3 5 5  0.7737809  0.7737809 10 6.131095 0.2748470 0.3744965 0.1631941
#12  3  3 5 5  0.8573750  0.7350919 10 6.815066 0.2342920 0.4218750 0.1703707
#14  2  3 4 6  0.7737809  0.7737809 10 6.131095 0.2373047 0.4202271 0.2262191
#15  2  4 4 6  0.7350919  0.7350919 10 7.059632 0.1779785 0.3942719 0.2649081
#          AL
#8  0.1100501
#11 0.1158586
#12 0.1426250
#14 0.1100501
#15 0.1138223

####4 change beta, alpha
fun1(10,0.25,0.25,0.05,0.05,0.07,0.07,0.15,0.20,0.4,0.4)
#Joining by: m1, n1, cterm1_P0L, cterm1_P1L, cterm1_P0H, cterm1_P1H
#   m1 n1 m n cterm1_P0L cterm1_P0H  N       EN        BH        BL        AH
#8   2  3 5 5  0.7737809  0.7737809 10 6.131095 0.2748470 0.3744965 0.1631941
#11  2  4 4 6  0.7350919  0.7350919 10 7.059632 0.1779785 0.3942719 0.2649081
#          AL
#8  0.1158586
#11 0.1138223

###5
 fun1(10,0.21,0.15,0.15,0.15,0.05,0.05,0.11,0.18,0.4,0.4)
#Joining by: m1, n1, cterm1_P0L, cterm1_P1L, cterm1_P0H, cterm1_P1H
# [1] m1         n1         m          n          cterm1_P0L cterm1_P0H
# [7] N          EN         BH         BL         AH         AL        
#<0 rows> (or 0-length row.names)

####6
fun1(10,0.15,0.25,0.05,0.06,0.07,0.07,0.15,0.20,0.5,0.4)
#Joining by: m1, n1, cterm1_P0L, cterm1_P1L, cterm1_P0H, cterm1_P1H
#  m1 n1 m n cterm1_P0L cterm1_P0H N EN        BH        BL        AH        AL
#2  2  2 5 4          0          0 9  9 0.1403911 0.4437053 0.3958713 0.2262191
#3  3  2 5 4          0          0 9  9 0.1403911 0.4437053 0.3958713 0.2262191
A.K.


________________________________
From: Joanna Zhang <zjoanna2013 at gmail.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Monday, March 11, 2013 10:54 PM
Subject: Re: [R] cumulative sum by group and under some criteria


Thanks! it works well if I open up R and run this code first. but if I change alpha<-0.1, beta<-0.1, and run the codes, the res7 not found, which is correct as d3 is empty, however, if I change back to alpha<-0.6,  beta<-0.6, and run the codes, res7 still not found, which is not correct. could you try it?


On Mon, Mar 11, 2013 at 8:14 PM, arun <smartpink111 at yahoo.com> wrote:

HI,
>
>
>maxN<-10
>c11<-0.07
>c12<-0.07
>c1<-0.15
>c2<-0.20
>
>p0L<-0.05
>p0H<-0.05
>p1L<-0.25
>p1H<-0.25
>
>alpha<-0.6
>beta<-0.6
>
>
>bay<-function (c11,c12,c1,c2){
>d<-do.call(rbind,lapply(2:(maxN-6),function(m1)
>do.call(rbind,lapply(2:(maxN-m1-4),function(n1)
>do.call(rbind,lapply(0:m1,function(x1)
>do.call(rbind,lapply(0:n1,function(y1)
>expand.grid(m1,n1,x1,y1)))))))))
>names(d)<-c("m1","n1","x1","y1")
>
>d<-within(d,{
>term1_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)
>term1_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)
>})     
>
>########## add Qm Qn ##################################
>set.seed(8)
>d1<-do.call(rbind,lapply(seq_len(nrow(d)),function(i){
>Pm<- rbeta(1000,0.2+d[i,"x1"],0.8+d[i,"m1"]-d[i,"x1"]);
>Pn<- rbeta(1000,0.2+d[i,"y1"],0.8+d[i,"n1"]-d[i,"y1"]);
>Fm<- ecdf(Pm);
>Fn<- ecdf(Pn);
>#Fmm<- Fm(d[i,"p11"]);
>#Fnn<- Fn(d[i,"p12"]);
>Fmm<- Fm(p1L);
>Fnn<- Fn(p1H);
>R<- (Fmm+Fnn)/2;
>Fmm_f<- max(R, Fmm);
>Fnn_f<- min(R, Fnn);
>Qm<- 1-Fmm_f;
>Qn<- 1-Fnn_f;
>data.frame(Qm,Qn)}))
>d2<-cbind(d,d1)
>
>
>library(zoo)
>lst1<- split(d2,list(d$m1,d$n1))
>d2<-do.call(rbind,lapply(lst1[lapply(lst1,nrow)!=0],function(x){
>x[,9:14]<-NA;
>x[,9:10][x$Qm<=c11,]<-cumsum(x[,5:6][x$Qm<=c11,]);
>x[,11:12][x$Qn<=c12,]<-cumsum(x[,5:6][x$Qn<=c12,]);
>x[,13:14]<-cumsum(x[,5:6]);
>colnames(x)[9:14]<- c("cterm1_P0L","cterm1_P1L","cterm1_P0H","cterm1_P1H","sumTerm1_p0","sumTerm1_p1");
>x1<-na.locf(x);
>x1[,9:14][is.na(x1[,9:14])]<-0;
>x1}
>))
>row.names(d2)<-1:nrow(d2)
>
>
>res1<-aggregate(.~m1+n1,data=d2[,c(1:2,9:12)],max)
>
>d3<-res1[res1[,4]<=beta & res1[,6]<beta,]
>f1<-function(dat){
>stopifnot(nrow(dat)!=0)
>library(plyr)
>
>join(dat,d2,by=c("m1","n1"),type="inner")
>}###not present
>
>d4<-f1(d3)###not run
>
>#########################
>################ 2nd stage ################################
>#### this method not look at the combination of m1 and n1,
>#### so need to merger the orginal data 'd2
>
>res2<-do.call(rbind,lapply(unique(d4$m1),function(m1)
>do.call(rbind,lapply(unique(d4$n1),function(n1)
>do.call(rbind,lapply(unique(d4$x1),function(x1)
>do.call(rbind,lapply(unique(d4$y1),function(y1)
> 
>#do.call(rbind,lapply(0:m1,function(x1)
>#do.call(rbind,lapply(0:n1,function(y1)
>do.call(rbind,lapply((m1+2):(maxN-2-n1),function(m)
>do.call(rbind,lapply((n1+2):(maxN-m),function(n)
>do.call(rbind,lapply(x1:(x1+m-m1), function(x)
>do.call(rbind,lapply(y1:(y1+n-n1), function(y)
>expand.grid(m1,n1,x1,y1,m,n,x,y)) )))))))))))))))
>
>names(res2)<- c("m1","n1","x1","y1","m","n","x","y")
>attr(res2,"out.attrs")<-NULL
>
>res3<- join(res2,d4,by=c("m1","n1","x1","y1"),type="inner")
>res3<-res3[,c(1:16)]
>############################################################# add more variables another method #############################
>res3<- within(res3,{
>term2_p0<- dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
>term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})
>
>#term2_p0<- term1_p0*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);
>#term2_p1<- term1_p1*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})
>
>res4<-do.call(rbind,lapply(seq_len(nrow(res3)),function(i){
>Pm2<-rbeta(1000,0.2+res3[i,"x"],0.8+res3[i,"m"]-res3[i,"x"]);
>Pn2<- rbeta(1000,0.2+res3[i,"y"],0.8+res3[i,"n"]-res3[i,"y"]);
>Fm2<- ecdf(Pm2);
>Fn2<- ecdf(Pn2);
>#Fmm2<- Fm2(res3[i,"p1"]);
>#Fnn2<- Fn2(res3[i,"p2"]);
>
>Fmm2<- Fm2(p1L);
>Fnn2<- Fn2(p1H);
>
>R2<- (Fmm2+Fnn2)/2;
>Fmm_f2<- max(R2, Fmm2);
>Fnn_f2<- min(R2, Fnn2);
>Qm2<- 1-Fmm_f2;
>Qn2<- 1-Fnn_f2;
>data.frame(Qm2,Qn2)}))
>
>res5<-cbind(res3,res4)
>################################################# calculate cumulative sum #################################
>
>lst2<- split(res5,list(res5$m1,res5$n1,res5$m,res5$n))
> 
>res5<-do.call(rbind,lapply(lst2[lapply(lst2,nrow)!=0],
>function(x){
>x[,21:26]<-NA;
>x[,21:22][x$Qm2<=c1 & x$Qm>c11,]<-cumsum(x[,17:18][x$Qm2<=c1 & x$Qm>c11,]);
>x[,23:24][x$Qn2<=c2 & x$Qn>c12,]<-cumsum(x[,17:18][x$Qn2<=c2 & x$Qn>c12,]);
>x[,25:26]<-cumsum(x[,17:18]);
>
>colnames(x)[21:26]<- c("cterm2_P1L","cterm2_P0L","cterm2_P1H","cterm2_P0H","sumT2p0","sumT2p1");
>x2<-na.locf(x);
>x2[,21:26][is.na(x2[,21:26])]<-0; x2}
>))
>row.names(res5)<-1:nrow(res5)
>res6<-aggregate(.~m1+n1+m+n,data=res5[,c(1:6,9:12,21:24)] ,max)
>res6<-within(res6,{AL<-1-(cterm1_P0L + cterm2_P0L);
>AH<-1-(cterm1_P0H + cterm2_P0H);
>BL<-(cterm1_P1L + cterm2_P1L);
>BH<-(cterm1_P1H + cterm2_P1H);
>EN<- m1 + (m-m1)*(1-cterm1_P0L) + n1 + (n-n1)*(1-cterm1_P0H);
>N<-m+n
>})
>res7<-res6[,c(1:4,7,9,15:20)]
>res7<-res7[res7[,9]<=beta & res7[,10]<=beta &res7[,11] <= alpha & res7[,12] <= alpha,]
>}
>bay(0.07,0.07,0.15,0.20)
>res7
>
> head(res7)
>#  m1 n1 m n cterm1_P0L cterm1_P0H  N       EN        BH        BL        AH
>#1  2  2 4 4  0.8167625          0  8 6.366475 0.1001129 0.4702148 0.3365796
>#2  2  2 5 4  0.8167625          0  9 6.549713 0.2002258 0.4405518 0.2038955
>#3  3  2 5 4  0.8573750          0  9 7.285250 0.2002258 0.4218750 0.2038955
>#4  2  2 6 4  0.8167625          0 10 6.732950 0.1689405 0.4558468 0.2121882
>#5  3  2 6 4  0.8573750          0 10 7.427875 0.1689405 0.4781885 0.2121882
>#6  4  2 6 4  0.8145062          0 10 8.370988 0.1689405 0.3914909 0.2121882
> #         AL
>#1 0.10585941
>#2 0.10972831
>#3 0.14262500
>#4 0.05037883
>#5 0.04808759
>#6 0.05944387
>
>
>
>
>
>A.K.
>



More information about the R-help mailing list