[R] reading data

arun smartpink111 at yahoo.com
Mon Feb 18 20:41:23 CET 2013


Hi,
I am not able to open your graph.  I am using linux.

Also, the codes in the function are not reproducible
 directT <- direct[grepl("^t", direct)]
 directC <- direct[grepl("^c", direct)]

It takes double the time to know what is going on.

dir()
#[1] "a1" "a2" "a3" "b1" "b2" "c1"

direct<- list.files(recursive=TRUE)[grepl("^a|^b",dir())]

 direct
#[1] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt"
#[4] "MSMS_23PepInfo.txt" "MSMS_23PepInfo.txt"
directA<- list.files(recursive=TRUE)[grepl("^a",dir())]
directB<- list.files(recursive=TRUE)[grepl("^b",dir())]
lista<- lapply(direct,function(x) read.table(x,header=TRUE,stringsAsFactors=FALSE,sep="\t",fill=TRUE))

listaA<-lapply(directA, function(x) read.table(x,header=TRUE, sep = "\t",fill=TRUE))
listaB<-lapply(directB, function(x) read.table(x,header=TRUE, sep = "\t",fill=TRUE))

#here I am changing the names listaT, z, etc..

count different mm values
 cab <- vector()
    for (i in 1:length(lista)) {
         dc<-lista[[i]][ifelse(lista[[i]]$b<0.01, TRUE, FALSE),]
        dc<-table(dc$mm)
        cab <- c(cab, names(dc))
  }

 #Relative freqs to construct the graph
    cab <- unique(cab)
    d <- matrix(ncol=length(cab))
 dci<- d[-1,]
    dcf <- d[-1,]
 dti <- d[-1,]
 dtf <- d[-1,]

    ########################################
 for (i in 1:length(listaA)) {

  #Relative freq of all data
  dcc<-listaA[[i]]
  dcc<-table(factor(dcc$mm, levels=cab))
  dci<- rbind(dci, dcc)
  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "a")


  #Relative freq of data with FDR<0.01
  dcc1<-listaA[[i]][ifelse(listaA[[i]]$FDR<0.01, TRUE, FALSE),]
  dcc1<-table(factor(dcc1$mm, levels=cab))
  dcf<- rbind(dcf,dcc1)
  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "a")
        }

 for (i in 1:length(listaB)) {

  #Relative freq of all data
  dct<-listaB[[i]]
  dct<-table(factor(dct$mm, levels=cab))
  dti<- rbind(dti, dct)
  rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "b")


  #Relative freq of data with FDR<0.01
  dct1<-listaB[[i]][ifelse(listaB[[i]]$FDR<0.01, TRUE, FALSE),]
  dct1<-table(factor(dct1$mm, levels=cab))
  dtf<- rbind(dtf,dct1)
  rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "b")
        }
  freq.i<-rbind(dci,dti)
  freq.f<-rbind(dcf,dtf)
  freq.rel.i<-freq.i/apply(freq.i,1,sum)
  freq.rel.f<-freq.f/apply(freq.f,1,sum) 


 freq.i
#   2 3
#a1 4 1
#a2 4 1
#a3 4 1
#b1 4 1
#b2 4 1
#b3 4 1
#b4 4 1
#result from my code.  
 files<-paste("MSMS_",23,"PepInfo.txt",sep="")
read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,sep = "\t",stringsAsFactors=FALSE,fill=TRUE))}
lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="")

res2<-split(lista,names(lista))
res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
res4<-lapply(seq_along(res3),function(i) do.call(rbind,lapply(res3[[i]], function(x) table(x$mm[x[["b"]]<0.01]))))
 names(res4)<- names(res2)


res4
$group_a
#   2 3
#a1 3 1
#a2 3 1
#a3 3 1

#$group_b
 #  2 3
#b1 3 1
#b2 3 1

#$group_c
 #  2 3
#c1 3 1

There is a difference in output from freq.i and res4.  There were only two files under 'group_b`.  So, check your codes.
A.K.






________________________________
From: Vera Costa <veracosta.rt at gmail.com>
To: arun <smartpink111 at yahoo.com> 
Sent: Monday, February 18, 2013 10:27 AM
Subject: Re: reading data


Hi!!!

I'm coming to ask a new question.

I want a function to do my statistics. I start with you had send me:

z.plot <- function(directory,number) {
  setwd(directory)
 indx<-gsub("[./]","",list.dirs()) 
 indx1<- indx[indx!=""] 
 print(indx1)
 files<-paste("MSMS_",number,"PepInfo.txt",sep="")
 read.data<-function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,sep = "\t",stringsAsFactors=FALSE,fill=TRUE))}
 lista<-do.call("c",lapply(list.files(recursive=T)[grep(files,list.files(recursive=T))],read.data))
 print(lista)
 #names(lista)<-paste("group_",gsub("\\d+","",names(lista)),sep="") ve = TRUE)
 }
z.plot("C:/Users/Vera Costa/Desktop/dados.lixo",23)


In my lista I can´t merge rows to have the group, because the idea is for each file count  frequencies of mm, when b<0.01. after that I want a graph like the graph in attach.


When I had 2 groups and knew the name of the groups, I did the code (but Know I have more groups and, maybe, I don´t know the name of the groups):

z.plot <- function(directory,number) {
 #reading data
  setwd(directory)
 direct<-dir(directory,pattern = paste("MSMS_",number,"PepInfo.txt",sep=""), full.names = FALSE, recursive = TRUE)
 directT <- direct[grepl("^t", direct)]
 directC <- direct[grepl("^c", direct)]

 lista<-lapply(direct, function(x) read.table(x,header=TRUE, sep = "\t"))
 listaC<-lapply(directC, function(x) read.table(x,header=TRUE, sep = "\t"))
 listaT<-lapply(directT, function(x) read.table(x,header=TRUE, sep = "\t"))

 #count different z values
 cab <- vector()
    for (i in 1:length(lista)) {
         dc<-lista[[i]][ifelse(lista[[i]]$FDR<0.01, TRUE, FALSE),]
        dc<-table(dc$z)
        cab <- c(cab, names(dc))
  }

 #Relative freqs to construct the graph
    cab <- unique(cab)
    d <- matrix(ncol=length(cab))
 dci<- d[-1,]
    dcf <- d[-1,]
 dti <- d[-1,]
 dtf <- d[-1,]

    for (i in 1:length(listaC)) {

  #Relative freq of all data
  dcc<-listaC[[i]]
  dcc<-table(factor(dcc$z, levels=cab))
  dci<- rbind(dci, dcc)
  rownames(dci)<-rownames(1:(nrow(dci)), do.NULL = FALSE, prefix = "c")


  #Relative freq of data with FDR<0.01
  dcc1<-listaC[[i]][ifelse(listaC[[i]]$FDR<0.01, TRUE, FALSE),]
  dcc1<-table(factor(dcc1$z, levels=cab))
  dcf<- rbind(dcf,dcc1)
  rownames(dcf)<-rownames(1:(nrow(dcf)), do.NULL = FALSE, prefix = "c")
        }

 for (i in 1:length(listaT)) {

  #Relative freq of all data
  dct<-listaT[[i]]
  dct<-table(factor(dct$z, levels=cab))
  dti<- rbind(dti, dct)
  rownames(dti)<-rownames(1:(nrow(dti)), do.NULL = FALSE, prefix = "t")


  #Relative freq of data with FDR<0.01
  dct1<-listaT[[i]][ifelse(listaT[[i]]$FDR<0.01, TRUE, FALSE),]
  dct1<-table(factor(dct1$z, levels=cab))
  dtf<- rbind(dtf,dct1)
  rownames(dtf)<-rownames(1:(nrow(dtf)), do.NULL = FALSE, prefix = "t")
        }
  freq.i<-rbind(dci,dti)
  freq.f<-rbind(dcf,dtf)
  freq.rel.i<-freq.i/apply(freq.i,1,sum)
  freq.rel.f<-freq.f/apply(freq.f,1,sum) 

#Graph plot
colour<-sample(rainbow(nrow(freq.rel.i)))
par(mfrow=c(1,2))
barplot(freq.rel.i,beside=T,main=("Sample"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.i))
barplot(freq.rel.f,beside=T,main=("Sample with FDR<0.01"),xlab="Charge",ylab="Relative Frequencies",col=colour,legend.text = rownames(freq.rel.f))
#average of the group (except c1&t1)
freqs<-rbind(dcf[-1,], dtf[-1,])
average<-apply(freqs,2,mean)

#chisquare test function
chisq.test<-function(x,y){
 somax<-sum(x)
 somay<-sum(y)
 nj.<-x+y
 nj<-sum(nj.)
 ejx<-(nj./nj)*somax
 ejy<-(nj./nj)*somay
 ETx<-((x-ejx)^2)/ejx
 ETy<-((y-ejy)^2)/ejy
 ETobs<-sum(ETx)+sum(ETy)
 pvalue<-1-pchisq(c(ETobs),df=length(x|y)-1,lower.tail=TRUE)
 return(pvalue)
 }

#pvalues of the chisquare test between sample and average (H0: two samples has the same distribution)
pvalues<-c()
for (i in 1:(nrow(freqs))){
a<-chisq.test(freqs[i,],average)
pvalues<-c(pvalues,a)
}
#data frame with final p-values 
dataframe<-data.frame(c(rownames(freqs)), c(pvalues))
colnames(dataframe)<-c("sample name","pvalue")
print(dataframe)
}
z.plot("C:/Users/Vera/Desktop/data",23)



Thank you again



2013/2/17 arun <smartpink111 at yahoo.com>

HI Vera,
>
>No problem.  I am cc:ing to r-help.
>
>A.K.
>
>
>
>
>
>
>________________________________
>From: Vera Costa <veracosta.rt at gmail.com>
>To: arun <smartpink111 at yahoo.com>
>Sent: Sunday, February 17, 2013 5:44 AM
>Subject: Re: reading data
>
>
>
>Hi. Thank you. It works now:-)
>And yes, I use windows.
>Thank you very much.
>No dia 17 de Fev de 2013 00:44, "arun" <smartpink111 at yahoo.com> escreveu:
>
>Hi Vera,
>>
>>Have you tried the suggestion?
>>
>>Are you using Windows?
>>Thanks,
>>Arun
>>
>>
>>
>>
>>
>>
>>________________________________
>>From: Vera Costa <veracosta.rt at gmail.com>
>>To: arun <smartpink111 at yahoo.com>
>>Sent: Saturday, February 16, 2013 7:10 PM
>>Subject: Re: reading data
>>
>>
>>Thank you.
>>In mine, I have an error " 'what' must be a character string or a function".
>>I need to do equivalent in my system.
>>Thank you and sorry one more time.
>>No dia 16 de Fev de 2013 23:53, "arun" <smartpink111 at yahoo.com> escreveu:
>>
>>Hi,
>>>You didn't mention what the error message or whether you are reading file names which are  not "mmmmm11kk.txt".
>>>
>>>It is workiing on my system as I run it again.
>>>?c() combine values into a vector or list.
>>>
>>> sessionInfo()
>>>R version 2.15.1 (2012-06-22)
>>>Platform: x86_64-pc-linux-gnu (64-bit)
>>>
>>>locale:
>>> [1] LC_CTYPE=en_CA.UTF-8       LC_NUMERIC=C             
>>> [3] LC_TIME=en_CA.UTF-8        LC_COLLATE=en_CA.UTF-8   
>>> [5] LC_MONETARY=en_CA.UTF-8    LC_MESSAGES=en_CA.UTF-8  
>>> [7] LC_PAPER=C                 LC_NAME=C                
>>> [9] LC_ADDRESS=C               LC_TELEPHONE=C           
>>>[11] LC_MEASUREMENT=en_CA.UTF-8 LC_IDENTIFICATION=C      
>>>
>>>attached base packages:
>>>[1] stats     graphics  grDevices utils     datasets  methods   base    
>>>
>>>other attached packages:
>>>[1] stringr_0.6.2  reshape2_1.2.2
>>>
>>>loaded via a namespace (and not attached):
>>>[1] plyr_1.8
>>>
>>>
>>>#code
>>>
>>>
>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}))  #it seems like one of the rows of your file doesn't have 6 elements, so added fill=TRUE
>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>res2<-split(res,names(res))
>>>res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>#result
>>>
>>>res3
>>>#$group_a
>>>#$group_a$a1
>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>
>>>$group_a$a2
>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>
>>>$group_a$a3
>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>
>>>
>>>$group_b
>>>$group_b$b1
>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>
>>>$group_b$b2
>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>
>>>
>>>$group_c
>>>$group_c$c1
>>>     Id  M mm    x         b  u  k  j    y        p    v
>>>1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>
>>>
>>>A.K.
>>>
>>>
>>>
>>>________________________________
>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>To: arun <smartpink111 at yahoo.com>
>>>Sent: Saturday, February 16, 2013 6:32 PM
>>>Subject: Re: reading data
>>>
>>>
>>>Sorry again... In:
>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("...
>>>What is this c? In do.call(c,   When I put this row im R, I have an error.
>>>Thank you
>>>No dia 15 de Fev de 2013 18:11, "arun" <smartpink111 at yahoo.com> escreveu:
>>>
>>>Hi,
>>>>No problem.
>>>>
>>>>BTW, these questions are not stupid..
>>>>Arun
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>________________________________
>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>To: arun <smartpink111 at yahoo.com>
>>>>Sent: Friday, February 15, 2013 1:08 PM
>>>>Subject: Re: reading data
>>>>
>>>>
>>>>Thank you very much.
>>>>
>>>>I will try to apply and after I tell you if it is ok :-)
>>>>
>>>>Thank you and sorry about this questions (sometimes stupid questions).
>>>>
>>>>
>>>>
>>>>
>>>>2013/2/15 arun <smartpink111 at yahoo.com>
>>>>
>>>>HI,
>>>>>No problem.
>>>>>?c() for concatenate to vector or list().
>>>>>If I use do.call(cbind,..) or do.call(rbind,...)
>>>>>
>>>>>do.call(cbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) 
>>>>>#   [,1]    [,2]    [,3]    [,4]    [,5]    [,6]  
>>>>>#a1 List,11 List,11 List,11 List,11 List,11 List,11
>>>>>
>>>>>
>>>>> do.call(rbind,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})) 
>>>>>#     a1    
>>>>>#[1,] List,11
>>>>>#[2,] List,11
>>>>>#[3,] List,11
>>>>>#[4,] List,11
>>>>>#[5,] List,11
>>>>>#[6,] List,11
>>>>>ie.
>>>>>list within in a list
>>>>>
>>>>> restrial<-lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))})
>>>>> str(restrial)
>>>>>#List of 6
>>>>># $ :List of 1
>>>>>  #..$ a1:'data.frame':    6 obs. of  11 variables:
>>>>>  .#. ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ...
>>>>>  #.. ..$ M : chr [1:6] "1" "1" "2" "1" ...
>>>>>  #. ..$ mm: int [1:6] 2 2 1 2 3 2
>>>>>  #. ..$ x : int [1:6] 739 2263 1 1965 3660 1972
>>>>>  -----------------------------------------------------------------
>>>>>str(res)
>>>>>#List of 6
>>>>># $ a1:'data.frame':    6 obs. of  11 variables:
>>>>> # ..$ Id: chr [1:6] "aAA" "aAAAA" "aA" "aAA" ...
>>>>>  #..$ M : chr [1:6] "1" "1" "2" "1" ...
>>>>> # ..$ mm: int [1:6] 2 2 1 2 3 2
>>>>> # ..$ x : int [1:6] 739 2263 1 1965 3660 1972
>>>>>-----------------------------------------------------------------
>>>>>
>>>>>You mentioned about naming this to "group_a","group_b". etc..
>>>>>
>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>res2<-split(res,names(res))
>>>>>
>>>>>res3<- lapply(res2,function(x) {names(x)<-paste(gsub(".*_","",names(x)),1:length(x),sep="");x})
>>>>> res3$group_a
>>>>>$a1
>>>>>
>>>>>#     Id  M mm    x         b  u  k  j    y        p    v
>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>
>>>>>#$a2
>>>>>
>>>>>#     Id  M mm    x         b  u  k  j    y        p    v
>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>
>>>>>#$a3
>>>>>
>>>>> #    Id  M mm    x         b  u  k  j    y        p    v
>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>A.K.
>>>>>
>>>>>________________________________
>>>>>From: Vera Costa <veracosta.rt at gmail.com>
>>>>>To: arun <smartpink111 at yahoo.com>
>>>>>Sent: Friday, February 15, 2013 12:39 PM
>>>>>Subject: Re: reading data
>>>>>
>>>>>
>>>>>
>>>>>Thank you very much and sorry my questions.
>>>>>
>>>>>But this code isn't grouping for letters sure? I mean, a1,a2,a3 is the same group, (the first letter give me the name of the group)
>>>>>
>>>>>Another question, in do.call, you did do.call (c,.....) .What is c?
>>>>>
>>>>>Sorry
>>>>>
>>>>>
>>>>>
>>>>>2013/2/15 arun <smartpink111 at yahoo.com>
>>>>>
>>>>>HI,
>>>>>>
>>>>>>Just to add:
>>>>>>
>>>>>>
>>>>>>res<-do.call(c,lapply(list.files(recursive=T)[grep("mmmmm11kk",list.files(recursive=T))],function(x) {names(x)<-gsub("^(.*)\\/.*","\\1",x); lapply(x,function(y) read.table(y,header=TRUE,stringsAsFactors=FALSE,fill=TRUE))}))  #it seems like one of the rows of your file doesn't have 6 elements, so added fill=TRUE
>>>>>>
>>>>>> names(res)<-paste("group_",gsub("\\d+","",names(res)),sep="")
>>>>>>res[grep("group_b",names(res))]
>>>>>>
>>>>>>I am not sure how you want the grouped data to look like.  If you want something like this:
>>>>>>res1<-do.call(rbind,res)
>>>>>>res2<-lapply(split(res1,gsub("[.0-9]","",row.names(res1))),function(x) {row.names(x)<-1:nrow(x);x})
>>>>>>res2
>>>>>>#$group_a
>>>>>>
>>>>>> #     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>#1    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#2  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#3     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#4    aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#5   aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#6     AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>#7    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#8  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#9     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#10   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#11  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#12    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>#13   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#14 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#15    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#16   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#17  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#18    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>
>>>>>>
>>>>>>#$group_b
>>>>>> #     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>#1    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#2  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#3     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#4    aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#5   aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#6     AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>#7    aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#8  aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#9     aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#10   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#11  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#12    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>
>>>>>>#$group_c
>>>>>>
>>>>>> #    Id  M mm    x         b  u  k  j    y        p    v
>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>
>>>>>>
>>>>>>#or if you want it like this:
>>>>>>res2<-split(res,names(res))
>>>>>>
>>>>>>res2[["group_b"]]
>>>>>>
>>>>>>#$group_b
>>>>>>#     Id  M mm    x         b  u  k  j    y        p    v
>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>
>>>>>>#$group_b
>>>>>> #    Id  M mm    x         b  u  k  j    y        p    v
>>>>>>#1   aAA  1  2  739 0.1257000  2  2 AA    2     8867 8926
>>>>>>#2 aAAAA  1  2 2263 0.0004000  2  2 AR    4     7640 8926
>>>>>>#3    aA  2  1    1 0.0845435  2 AA  2 6790 734,1092   NA
>>>>>>#4   aAA  1  2 1965 0.0007000  4  3 AR    2    11616 8926
>>>>>>#5  aAAA  1  3 3660 0.0008600 18  3 AA    2    20392  496
>>>>>>#6    AA na  2 1972 0.0007000 11  3 AR   25      509  734
>>>>>>
>>>>>>Hope this helps.
>>>>>>
>>>>>>A.K.
>>>>>>
>>>>>>
>>>>>>
>>>>>>----- Original Message -----
>>>>>>From: "veracosta.rt at gmail.com" <veracosta.rt at gmail.com>
>>>>>>To: smartpink111 at yahoo.com
>>>>>>Cc:
>>>>>>Sent: Friday, February 15, 2013 9:15 AM
>>>>>>Subject: reading data
>>>>>>
>>>>>>Hi,
>>>>>>I post yesterday and you helped me. I have little problem.
>>>>>>
>>>>>>At first, I never worked with regular expressions...
>>>>>>
>>>>>>The code that you gave me it's ok, but my files are inside the folders a1,a2,a3. I try to explain better.
>>>>>>
>>>>>>I have one folder named "data". Inside this folder I have some other folders named "a1","a2","b1",b2",...and inside of each one of that I have some files. I want only the file "mmmmmm.txt" (in all folders I have One file with this name).
>>>>>>The name of the folder give me the name of the group,but I need to read the file inside. And after, have "group_a", group_"b"...because I need to work with this data grouped (and know the name of the group).
>>>>>>
>>>>>>Thank you.
>>>>>>   
>>>>>   
>>>>
>>>
>>
>                                



More information about the R-help mailing list