# [R] cumulative sum by group and under some criteria

```res2<- join(res1,d3,by=c("m1","n1"),type="inner")
p0L<-0.05
p0H<-0.05
p1L<-0.20
p1H<-0.20

res2<- within(res2,{p1<- x/m; p2<- y/n;term2_p0<-dbinom(x1,m1, p0L, log=FALSE)* dbinom(y1,n1,p0H, log=FALSE)*dbinom(x-x1,m-m1, p0L, log=FALSE)* dbinom(y-y1,n-n1,p0H, log=FALSE);term2_p1<- dbinom(x1,m1, p1L, log=FALSE)* dbinom(y1,n1,p1H, log=FALSE)*dbinom(x-x1,m-m1, p1L, log=FALSE)* dbinom(y-y1,n-n1,p1H, log=FALSE)})

res4<-do.call(rbind,lapply(seq_len(nrow(res2)),function(i) {Pm2<-rbeta(1000,0.2+res2[i,"x"],0.8+res2[i,"m"]-res2[i,"x"]);Pn2<- rbeta(1000,0.2+res2[i,"y"],0.8+res2[i,"n"]-res2[i,"y"]); Fm2<- ecdf(Pm2); Fn2<- ecdf(Pn2); Fmm2<- Fm2(res2[i,"p1"]); Fnn2<- Fn2(res2[i,"p2"]);R2<- (Fmm2+Fnn2)/2; Fmm_f2<- min(R2, Fmm2); Fnn_f2<- max(R2, Fnn2); Qm2<- 1-Fmm_f2; Qn2<- 1-Fnn_f2;data.frame(Fmm2,Fnn2,R2,Fmm_f2,Fnn_f2,Qm2,Qn2)}))

res5<-cbind(res2,res4)
#  m1 n1 x1 y1 m n x y cterm1_P0L cterm1_P1L cterm1_P0H cterm1_P1H   term2_p1
#1  2  2  0  0 4 4 0 0     0.9025       0.64     0.9025       0.64 0.16777216
#2  2  2  0  0 4 4 0 1     0.9025       0.64     0.9025       0.64 0.08388608
#3  2  2  0  0 4 4 0 2     0.9025       0.64     0.9025       0.64 0.01048576
#4  2  2  0  0 4 4 1 0     0.9025       0.64     0.9025       0.64 0.08388608
#5  2  2  0  0 4 4 1 1     0.9025       0.64     0.9025       0.64 0.04194304
#     term2_p0   p2   p1 Fmm2  Fnn2     R2 Fmm_f2 Fnn_f2   Qm2   Qn2
#1 0.663420431 0.00 0.00 0.00 0.000 0.0000  0.000  0.000 1.000 1.000
#2 0.069833730 0.25 0.00 0.00 0.601 0.3005  0.000  0.601 1.000 0.399
#3 0.001837730 0.50 0.00 0.00 0.612 0.3060  0.000  0.612 1.000 0.388
#4 0.069833730 0.00 0.25 0.59 0.000 0.2950  0.295  0.295 0.705 0.705
#5 0.007350919 0.25 0.25 0.60 0.566 0.5830  0.583  0.583 0.417 0.417

ok.
1) for each row in the data, I want to simulate a sample of 1000 observations(Pm2) for the combination of x, m and another sample of 1000 observations(Pn2) for the combination of y, n and
2) get the cumulative distribution of these two samples Fm, Fn, respectively and
3) calculate the percentage of obs  )that are less than the x/m (Fmm2<-Fm2(x/m)and percentage of obs that are less than the y/n
(Fnn2<-Fn2(y/n)
4) I just want to keep Fmm2 and Fnn2 for each row in the final data
Thanks very much for your help.

for example, if I simulate a sample of 10 instead of 1000:

> Pm2<-rbeta(10, 0.2+1, 0.8+3)  #x=1, m=4
> Pn2<-rbeta(10, 0.2, 0.8+4)     #y=0, n=4
> Pm2
[1] 0.19567380 0.10242121 0.21295666 0.23824629 0.52519487 0.10825192 0.49724191 0.02098218 0.04740662 0.26410004
> Pn2
[1] 6.857148e-05 1.631983e-01 1.340303e-08 1.309932e-01 2.944966e-03 1.133654e-01 9.623050e-02 4.091554e-01 1.103247e-01 5.657689e-04
>
> Fm2<- ecdf(Pm2)
> Fn2<- ecdf(Pn2)
>
> Fmm2<-Fm2(1/4)
> Fnn2<-Fn2(0)
> Fmm2
[1] 0.7  # this is the percentage of observation that <= 1/4, I want to keep this vaule for each row
> Fnn2
[1] 0  # this is the percentage of observation that <= 0 , I want to keep this value for each row
>

