[R] new question

arun smartpink111 at yahoo.com
Thu Mar 28 19:28:03 CET 2013


Hi,
The function outputs the unique rows and also chisq test on frequency ( by row).


Spec <- function(lista,FDR_k) {

 list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
 split.list<-split(list.new,names(lista))

 #Data needed with FDR<FDR_k
 seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
 names(seq.mod.z)<- names(split.list) 

 #insert colunm with the name of the folder
 folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
 #merge data with the same Seq, Mod and z
 library(plyr)
 library(data.table)
 merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))

 #colunm with number of spec
 count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
 #count spec by group (2-columns)
 spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
  #spec.group1<-spec.group[lapply(spec.group,length)!=0]

 #data frame with count of spec
 res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
 res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
 #print(res)
Chisq1test_Count<-do.call(cbind,lapply(as.data.frame(combn(names(res)[4:ncol(res)],2),stringsAsFactors=FALSE),function(x) {x1<-data.frame(apply(cbind(res[x[1]],res[x[2]]),1,function(y){ifelse(sum(y)==0, NA, chisq.test(y)$p.value)}));colnames(x1)<- paste0("Count_",x[1],x[2]);x1}))
#print(Chisq1test_Count)
res1<- cbind(res,Chisq1test_Count)
res1
}

ListFacGroup<-ReadDir(FacGroup)
Spec(ListFacGroup,0.05)
 head(Spec(ListFacGroup,0.05))
#                        Seq                 Mod z a2 c2 c3 t2 Count_a2c2
#1    aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1 0.02534732
#2     aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1 0.01430588
#3          aAAAAAAAAAGAAGGR          1-n_acPro/ 2  1  1  0  1 1.00000000
#4               AAAAAAALQAK                     2  1  0  1  1 0.31731051
#5            aAAAAAGAGPEMVR          1-n_acPro/ 2  2  2  1  2 1.00000000
#6 aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2  1  0  0  1 0.31731051
 # Count_a2c3 Count_a2t2 Count_c2c3 Count_c2t2 Count_c3t2
#1 0.02534732 0.10247043         NA  0.3173105  0.3173105
#2 0.01430588 0.05878172         NA  0.3173105  0.3173105
#3 0.31731051 1.00000000  0.3173105  1.0000000  0.3173105
#4 1.00000000 1.00000000  0.3173105  0.3173105  1.0000000
#5 0.56370286 1.00000000  0.5637029  1.0000000  0.5637029
#6 0.31731051 1.00000000         NA  0.3173105  0.3173105

A.K.


________________________________
 From: arun <smartpink111 at yahoo.com>
To: Vera Costa <veracosta.rt at gmail.com> 
Cc: R help <r-help at r-project.org> 
Sent: Thursday, March 28, 2013 10:18 AM
Subject: Re: [R] new question
 
Hi,
Try this:


Spec <- function(lista,FDR_k) {

 list.new<-lapply(lista,function(x) within(x,{spec<- as.character(spec)}))
 split.list<-split(list.new,names(lista))

 #Data needed with FDR<FDR_k
 seq.mod.z<-lapply(seq_along(split.list),function(i) lapply(split.list[[i]],function(x) x[x[["FDR"]]<FDR_k,c("Seq","Mod","z","spec")]))
 names(seq.mod.z)<- names(split.list) 

 #insert colunm with the name of the folder
 folder.name<-lapply(seq.mod.z,function(x) lapply(names(x),function(i) do.call(rbind,lapply(x[i],function(x) cbind(folder_name=i,x)))))
 #merge data with the same Seq, Mod and z
 library(plyr)
 library(data.table)
 merge.data<- lapply(folder.name,function(x) lapply(x,function(x1) {x1<-data.table(x1); x1[,spec:=paste(spec,collapse=","),by=c("Seq","Mod","z")]}))

 #colunm with number of spec
 count.spec<-lapply(merge.data,function(x) lapply(x,function(x1) {x1$counts<-sapply(x1$spec, function(x2) length(gsub("\\s", "", unlist(strsplit(x2, ",")))));x3<-as.data.frame(x1);names(x3)[6]<- as.character(unique(x3$folder_name));x3[,-c(1,5)]}))
count.specUnique<-lapply(count.spec,function(x) lapply(x,unique))
 #count spec by group (2-columns)
 spec.group<-lapply(count.specUnique,function(x) Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),x))
  #spec.group1<-spec.group[lapply(spec.group,length)!=0]

 #data frame with count of spec
 res<- Reduce(function(...) merge(...,by=c("Seq","Mod","z"),all=TRUE),spec.group)
 res[is.na(res)] <- 0
res<- as.data.frame(res,stringsAsFactors=FALSE)
 print(res)
}

 Spec(ListFacGroup,0.05)
#                               Seq                 Mod z a2 c2 c3 t2
#1            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
#2             aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
#3                  aAAAAAAAAAGAAGGR          1-n_acPro/ 2  1  1  0  1
#4                       AAAAAAALQAK                     2  1  0  1  1
#5                    aAAAAAGAGPEMVR          1-n_acPro/ 2  2  2  1  2
#6         aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2  1  0  0  1
#7         aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3  1  0  0  1
#8         aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 2  0  1  0  0
#9         aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 3  1  2  2  1
#10                      AAAAAPGTAEK                     2  0  1  0  0
#11            aAAAASAPQQLSDEELFSQLR          1-n_acPro/ 2  1  0  0  1
#12                  aAAAAVGNAVPCGAR          1-n_acPro/ 2  1  1  1  1
#13                AAAAAWEEPSSGNGTAR                     2  1  1  1  1
#14                      aAAAELSLLEK          1-n_acPro/ 1  1  0  0  1
#15                      aAAAELSLLEK          1-n_acPro/ 2  1  1  1  1
#16                     AAAAEVLGLILR                     2  1  1  1  1
#17      aAAAGAAAAAAAEGEAPAEMGALLLEK          1-n_acPro/ 3  1  1  1  1
#18  aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3  0  0  1  0
#19  aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR          1-n_acPro/ 3  1  0  0  1
#20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK          1-n_acPro/ 3  1  0  0  1
#21                      AAAAAAAkAAK             8-K_ac/ 2  0  1  0  0
#22         aAAAVGAGHGAGGPGAASSSGGAR          1-n_acPro/ 2  0  1  1  0
#23         aAAAVGAGHGAGGPGAASSSGGAR          1-n_acPro/ 3  0  0  1  0
#24             aAADGDDSLYPIAVLIDELR          1-n_acPro/ 2  0  0  1  0


Regarding the 2nd question, I am a bit busy now.  Will try it later.
A.K.



________________________________
From: Vera Costa <veracosta.rt at gmail.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Thursday, March 28, 2013 9:43 AM
Subject: Re: new question


I don't remove duplicated, but write only one time. If I haven't "unique" I have the same row a lot of times, but with "unique" we remove all. I need this row write only one time. 
 
without "unique" the output is 
 
1            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
2            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
3            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
4            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
5            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
6             aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
7             aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
8             aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
9             aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
10            aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
11            aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  6  0  0  1
12                 aAAAAAAAAAGAAGGR          1-n_acPro/ 2  1  1  0  1
13                      AAAAAAALQAK                     2  1  0  1  1
14                   aAAAAAGAGPEMVR          1-n_acPro/ 2  2  2  1  2
15        aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2  1  0  0  1
16        aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3  1  0  0  1
17        aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 2  0  1  0  0
18        aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 3  1  2  2  1
19                      AAAAAPGTAEK                     2  0  1  0  0
20            aAAAASAPQQLSDEELFSQLR          1-n_acPro/ 2  1  0  0  1
21                  aAAAAVGNAVPCGAR          1-n_acPro/ 2  1  1  1  1
22                AAAAAWEEPSSGNGTAR                     2  1  1  1  1
23                      aAAAELSLLEK          1-n_acPro/ 1  1  0  0  1
24                      aAAAELSLLEK          1-n_acPro/ 2  1  1  1  1
25                     AAAAEVLGLILR                     2  1  1  1  1
26      aAAAGAAAAAAAEGEAPAEMGALLLEK          1-n_acPro/ 3  1  1  1  1
27  aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3  0  0  1  0
28  aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR          1-n_acPro/ 3  1  0  0  1
29 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK          1-n_acPro/ 3  1  0  0  1
30                      AAAAAAAkAAK             8-K_ac/ 2  0  1  0  0
31         aAAAVGAGHGAGGPGAASSSGGAR          1-n_acPro/ 2  0  1  1  0
32         aAAAVGAGHGAGGPGAASSSGGAR          1-n_acPro/ 3  0  0  1  0
33             aAADGDDSLYPIAVLIDELR          1-n_acPro/ 2  0  0  1  0

 
with "unique" is
 
                                Seq                 Mod z a2 c2 c3 t2
1            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  1  0  0  1
2             aAAAAAAAAAAASSPVGVGQR          1-n_acPro/ 2  1  0  0  1
3                  aAAAAAAAAAGAAGGR          1-n_acPro/ 2  1  1  0  1
4                       AAAAAAALQAK                     2  1  0  1  1
5                    aAAAAAGAGPEMVR          1-n_acPro/ 2  2  2  1  2
6         aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 2  1  0  0  1
7         aAAAAEQQQFYLLLGNLLSPDNVVR 1-<_Carbamoylation/ 3  1  0  0  1
8         aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 2  0  1  0  0
9         aAAAAEQQQFYLLLGNLLSPDNVVR          1-n_acPro/ 3  1  2  2  1
10                      AAAAAPGTAEK                     2  0  1  0  0
11            aAAAASAPQQLSDEELFSQLR          1-n_acPro/ 2  1  0  0  1
12                  aAAAAVGNAVPCGAR          1-n_acPro/ 2  1  1  1  1
13                AAAAAWEEPSSGNGTAR                     2  1  1  1  1
14                      aAAAELSLLEK          1-n_acPro/ 1  1  0  0  1
15                      aAAAELSLLEK          1-n_acPro/ 2  1  1  1  1
16                     AAAAEVLGLILR                     2  1  1  1  1
17      aAAAGAAAAAAAEGEAPAEMGALLLEK          1-n_acPro/ 3  1  1  1  1
18  aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR 1-<_Carbamoylation/ 3  0  0  1  0
19  aAAAGGGGPGTAVGATGSGIAAAAAGLAVYR          1-n_acPro/ 3  1  0  0  1
20 aAAANSGSSLPLFDCPTWAGKPPPGLHLDVVK          1-n_acPro/ 3  1  0  0  1
21                      AAAAAAAkAAK             8-K_ac/ 2  0  1  0  0
22         aAAAVGAGHGAGGPGAASSSGGAR          1-n_acPro/ 2  0  1  1  0
23         aAAAVGAGHGAGGPGAASSSGGAR          1-n_acPro/ 3  0  0  1  0
24             aAADGDDSLYPIAVLIDELR          1-n_acPro/ 2  0  0  1  0

 
 
But I need the row 
 
1            aAAAAAAAAAAAAAATATAGPR          1-n_acPro/ 2  5  0  0  1
 
write only one time

______________________________________________
R-help at r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.



More information about the R-help mailing list