[R] R: re: dataframe

Wed Apr 30 15:22:00 CEST 2014

Hi James,

I guess the problem was because the columns you tried were factors.
Suppose `dat1` is your dataset (first).
library(stringr)
indx <- grep("sample", colnames(dat1))
toGrep <- read.table(text = str_trim(gsub("[sample.]", " ", colnames(dat1)[indx])), 
    header = FALSE, stringsAsFactors = FALSE)
dat1New <- dat1  
indx2 <- grep("^o", colnames(dat1New))
dat1New[indx2] <- lapply(dat1New[indx2], as.character)
lst1 <- lapply(seq_len(nrow(toGrep)), function(i) {
    indx3 <- intersect(grep(toGrep[i, 1], dat1New), grep(paste0("\\b", toGrep[i, 
        2], "\\b"), dat1New))
    indx4 <- grep(paste(toGrep[i, ], collapse = "."), colnames(dat1New))
    indx5 <- which(!seq(ncol(dat1New)) %in% c(indx, indx2))
    dat1New[, sort(c(indx3, indx4, indx5)), drop = FALSE]
})
names(lst1) <- as.character(interaction(toGrep, sep = "."))

Here, I assumed that you wanted only the particular "sample...", column along with the subset.

 lapply(lst1,names)[[1]]
 [1] "X"                            "rt"                          
 [3] "mz"                           "o066_010"                    
 [5] "o066_022"                     "o066_029"                    
 [7] "o066_034"                     "o066_068"                    
 [9] "o066_072"                     "o066_079"                    
[11] "o066_081"                     "o066_086"                    
[13] "pspectrum"                    "isotopes"                    
[15] "adduct"                       "mzmin"                       
[17] "mzmax"                        "rtmin"                       
[19] "rtmax"                        "npeaks"                      
[21] "sample.Au5.C"                 "X2W.p.value.Nanoparticle"    
[23] "X2W.p.value.Treatment"        "X2W.adj.p.value.Nanoparticle"
[25] "X2W.adj.p.value.\nTreatment"  "Metlin"                      
[27] "HMDBtag"                      "HMDBinfo"      
A.K.

On Wednesday, April 30, 2014 8:29 AM, "pascalbells at libero.it" <pascalbells at libero.it> wrote:
Hi, thank you again for your help....

here is the dput:

structure(list(X = structure(c(8L, 7L, 9L, 1L), .Label = c("61/195", 
"69/954", "72/300", "73/300", "74/946", "76/611", "Nanoparticle", 
"SampleName", "Treatment"), class = "factor"), rt = c(NA, NA, 
NA, 195.09555), mz = c(NA, NA, NA, 61.00722661), o066_010 = structure(c(9L, 
7L, 8L, 2L), .Label = c("14043676.02", "14536204.77", "17652481.49", 
"5212485.416", "6731535.564", "9876651.476", "Au5", "C", "sample"
), class = "factor"), o066_019 = structure(c(9L, 7L, 8L, 3L), .Label = c
("10023255.04", 
"15073569.61", "15473173.22", "28708474.98", "5633388.668", "7154698.204", 
"Au5", "H", "sample"), class = "factor"), o066_022 = structure(c(9L, 
7L, 8L, 2L), .Label = c("10087537.1", "13846871.69", "14528006.42", 
"18458758.83", "5383022.265", "6577623.782", "Au5", "C", "sample"
), class = "factor"), o066_023 = structure(c(9L, 7L, 8L, 1L), .Label = c
("15723091.09", 
"17023369.3", "18462299.71", "4996777.375", "6987554.082", "9174926.395", 
"Au5", "L", "sample"), class = "factor"), o066_024 = structure(c(9L, 
7L, 8L, 1L), .Label = c("14367825.89", "16698747.9", "18696653.21", 
"5025690.383", "6930208.708", "9736032.197", "Au32", "M", "sample"
), class = "factor"), o066_025 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14488608.39", 
"17814216.17", "18698120.65", "4950983.925", "7193018.533", "8061360.256", 
"Au32", "M", "sample"), class = "factor"), o066_029 = structure(c(9L, 
7L, 8L, 2L), .Label = c("11915681.99", "13140449.17", "16980818.81", 
"5529195.67", "7045157.096", "9792452.409", "Au5", "C", "sample"
), class = "factor"), o066_032 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14598638.42", 
"17574435.2", "18133608.77", "5440291.241", "6145144.579", "6754867.22", 
"Au32", "C", "sample"), class = "factor"), o066_034 = structure(c(9L, 
7L, 8L, 1L), .Label = c("14979396.48", "18783422.61", "18944230.28", 
"5058848.793", "6240890.066", "7253951.112", "Au5", "C", "sample"
), class = "factor"), o066_039 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14540874.05", 
"17997589.68", "18808525.49", "4923318.003", "4989990.142", "6943084.118", 
"Au32", "C", "sample"), class = "factor"), o066_049 = structure(c(9L, 
7L, 8L, 1L), .Label = c("13904244.76", "17285116.29", "18378290.63", 
"5578459.181", "6817168.77", "6956602.05", "Au32", "H", "sample"
), class = "factor"), o066_052 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14351098.41", 
"16914393.93", "27503386.81", "6337197.417", "7047144.652", "8204323.852", 
"Au5", "H", "sample"), class = "factor"), o066_055 = structure(c(9L, 
7L, 8L, 1L), .Label = c("13906338.32", "15282046.89", "18357761.85", 
"5612493.227", "5990567.16", "6541120.91", "Au32", "L", "sample"
), class = "factor"), o066_056 = structure(c(9L, 7L, 8L, 1L), .Label = c
("13198308.2", 
"16943030.55", "24819426.73", "5794416.907", "6526930.135", "6965902.611", 
"Au5", "M", "sample"), class = "factor"), o066_057 = structure(c(9L, 
7L, 8L, 1L), .Label = c("13553932.63", "16650323.6", "23887991.99", 
"4978191.783", "6498421.044", "7482551.482", "Au5", "M", "sample"
), class = "factor"), o066_060 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12757675.44", 
"14478961.76", "17693417", "5688587.416", "6278360.393", "6778930.512", 
"Au32", "C", "sample"), class = "factor"), o066_062 = structure(c(9L, 
7L, 8L, 1L), .Label = c("12566817.06", "16230428.36", "18310198.43", 
"4929894.983", "6051669.256", "6893438.406", "Au32", "C", "sample"
), class = "factor"), o066_063 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12765436.23", 
"15664549.3", "18880700.09", "5012562.077", "5653516.005", "6891139.714", 
"Au5", "L", "sample"), class = "factor"), o066_068 = structure(c(9L, 
7L, 8L, 2L), .Label = c("10077274.11", "11533763.33", "11914610.67", 
"17481826.33", "5118520.628", "5722064.277", "Au5", "C", "sample"
), class = "factor"), o066_072 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12506876.82", 
"15059553.41", "17963658.12", "5147251.947", "5936120.075", "6757510.158", 
"Au5", "C", "sample"), class = "factor"), o066_075 = structure(c(9L, 
7L, 8L, 1L), .Label = c("12629327.74", "14157915.9", "17676966.26", 
"5507872.857", "5772060.453", "6460845.336", "Au5", "L", "sample"
), class = "factor"), o066_077 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11424841.46", 
"13873092.31", "16264095.9", "5139882.469", "5267420.049", "6093969.775", 
"Au32", "C", "sample"), class = "factor"), o066_079 = structure(c(9L, 
7L, 8L, 1L), .Label = c("11310021.96", "12817107.8", "16850943.65", 
"4790059.15", "5497826.813", "6272350.228", "Au5", "C", "sample"
), class = "factor"), o066_080 = structure(c(9L, 7L, 8L, 1L), .Label = c
("10962613.96", 
"12999912.78", "17414255.92", "5079434.424", "5790847.752", "6285463.247", 
"Au32", "C", "sample"), class = "factor"), o066_081 = structure(c(9L, 
7L, 8L, 1L), .Label = c("10716085.25", "12783977.19", "17139019.04", 
"5316270.963", "5477933.375", "5939018.303", "Au5", "C", "sample"
), class = "factor"), o066_082 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11020219.13", 
"12911640.64", "17298440.71", "5292075.669", "5608730.99", "5950511.372", 
"Au32", "C", "sample"), class = "factor"), o066_084 = structure(c(9L, 
7L, 8L, 1L), .Label = c("11254185.62", "14433261.34", "17602305.5", 
"4220138.319", "4802246.445", "6580756.431", "Au32", "C", "sample"
), class = "factor"), o066_086 = structure(c(9L, 7L, 8L, 6L), .Label = c
("13251623.05", 
"17376126.66", "4975858.829", "5877369.947", "6459614.347", "9956435.681", 
"Au5", "C", "sample"), class = "factor"), o066_087 = structure(c(9L, 
7L, 8L, 1L), .Label = c("11090591.08", "12481560.98", "17685195.53", 
"4976028.139", "5729221.367", "5866020.444", "Au32", "H", "sample"
), class = "factor"), o066_090 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12039292.87", 
"13766587.51", "17661480.62", "4105603.641", "4892418.995", "6364759.427", 
"Au32", "M", "sample"), class = "factor"), o066_093 = structure(c(9L, 
7L, 8L, 1L), .Label = c("11050848.78", "14300477", "17455404.65", 
"4944754.379", "5314318.706", "6132171.794", "Au32", "L", "sample"
), class = "factor"), o066_101 = structure(c(9L, 7L, 8L, 1L), .Label = c
("10569492.27", 
"12736422.09", "16216119.36", "4941484.876", "5716315.813", "5888895.838", 
"Au32", "C", "sample"), class = "factor"), o066_102 = structure(c(9L, 
7L, 8L, 1L), .Label = c("10004456.65", "13327035.46", "16746388.21", 
"4074210.702", "5464246.567", "6303763.552", "Au32", "L", "sample"
), class = "factor"), o066_103 = structure(c(9L, 7L, 8L, 1L), .Label = c
("10248532.42", 
"12683248.9", "25351553.76", "5103432.052", "6204859.229", "6370345.756", 
"Au5", "H", "sample"), class = "factor"), o066_104 = structure(c(9L, 
7L, 8L, 6L), .Label = c("14115821.79", "16849972.37", "4717878.989", 
"6440223.441", "8546766.206", "9957099.225", "Au32", "H", "sample"
), class = "factor"), o066_105 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11382449.23", 
"15148302.74", "22676887.88", "5267156.789", "5970593.031", "6526962.522", 
"Au5", "M", "sample"), class = "factor"), pspectrum = c(NA, NA, 
NA, 24L), isotopes = c(NA, NA, NA, NA), adduct = structure(c(1L, 
1L, 1L, 1L), .Label = c("", "[M+H]+ 72.0781", "[M+Na]+ 51"), class = 
"factor"), 
    mzmin = c(NA, NA, NA, 61.00708832), mzmax = c(NA, NA, NA, 
    61.00730019), rtmin = c(NA, NA, NA, 194.0899), rtmax = c(NA, 
    NA, NA, 196.0904), npeaks = c(NA, NA, NA, 36L), sample.Au5.C = c(NA, 
    NA, NA, 9L), sample.Au5.H = c(NA, NA, NA, 9L), sample.Au5.L = c(NA, 
    NA, NA, 3L), sample.Au32.M = c(NA, NA, NA, 3L), sample.Au32.C = c(NA, 
    NA, NA, 3L), sample.Au32.H = c(NA, NA, NA, 3L), sample.Au32.L = c(NA, 
    NA, NA, 3L), sample.Au5.M = c(NA, NA, NA, 3L), X2W.p.value.Nanoparticle = c
(NA, 
    NA, NA, 0.291527766), X2W.p.value.Treatment = c(NA, NA, NA, 
    0.79589541), X2W.adj.p.value.Nanoparticle = c(NA, NA, NA, 
    0.453622043), X2W.adj.p.value.Treatment = c(NA, NA, NA, 0.870148042
    ), Metlin = structure(c(1L, 1L, 1L, 2L), .Label = c("", "http://metlin.
scripps.edu/metabo_list.php?mass_min=59.95&mass_max=60.05", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=67.936&mass_max=68.
036", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=71.024&mass_max=71.
124", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=72.027&mass_max=72.
127", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=72.932&mass_max=73.
032", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=74.982&mass_max=75.
082"
    ), class = "factor"), HMDBtag = structure(c(1L, 1L, 1L, 1L
    ), .Label = c("", "HMDB00123, HMDB00925, HMDB12136"), class = "factor"), 
    HMDBinfo = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_
    )), .Names = c("X", "rt", "mz", "o066_010", "o066_019", "o066_022", 
"o066_023", "o066_024", "o066_025", "o066_029", "o066_032", "o066_034", 
"o066_039", "o066_049", "o066_052", "o066_055", "o066_056", "o066_057", 
"o066_060", "o066_062", "o066_063", "o066_068", "o066_072", "o066_075", 
"o066_077", "o066_079", "o066_080", "o066_081", "o066_082", "o066_084", 
"o066_086", "o066_087", "o066_090", "o066_093", "o066_101", "o066_102", 
"o066_103", "o066_104", "o066_105", "pspectrum", "isotopes", 
"adduct", "mzmin", "mzmax", "rtmin", "rtmax", "npeaks", "sample.Au5.C", 
"sample.Au5.H", "sample.Au5.L", "sample.Au32.M", "sample.Au32.C", 
"sample.Au32.H", "sample.Au32.L", "sample.Au5.M", "X2W.p.value.Nanoparticle", 
"X2W.p.value.Treatment", "X2W.adj.p.value.Nanoparticle", "X2W.adj.p.value.
Treatment", 
"Metlin", "HMDBtag", "HMDBinfo"), row.names = c(NA, 4L), class = "data.frame")

>

I would like to arrange all the columns that match "Au5" and "C", as you can 
see below:

> dput(head(tabella,4))
structure(list(X = structure(c(8L, 7L, 9L, 1L), .Label = c("61/195", 
"69/954", "72/300", "73/300", "74/946", "76/611", "Nanoparticle", 
"SampleName", "Treatment"), class = "factor"), rt = c(NA, NA, 
NA, 195.09555), mz = c(NA, NA, NA, 61.00722661), o066_010 = structure(c(9L, 
7L, 8L, 2L), .Label = c("14043676.02", "14536204.77", "17652481.49", 
"5212485.416", "6731535.564", "9876651.476", "Au5", "C", "sample"
), class = "factor"), o066_022 = structure(c(9L, 7L, 8L, 2L), .Label = c
("10087537.1", 
"13846871.69", "14528006.42", "18458758.83", "5383022.265", "6577623.782", 
"Au5", "C", "sample"), class = "factor"), o066_029 = structure(c(9L, 
7L, 8L, 2L), .Label = c("11915681.99", "13140449.17", "16980818.81", 
"5529195.67", "7045157.096", "9792452.409", "Au5", "C", "sample"
), class = "factor"), o066_034 = structure(c(9L, 7L, 8L, 1L), .Label = c
("14979396.48", 
"18783422.61", "18944230.28", "5058848.793", "6240890.066", "7253951.112", 
"Au5", "C", "sample"), class = "factor"), o066_068 = structure(c(9L, 
7L, 8L, 2L), .Label = c("10077274.11", "11533763.33", "11914610.67", 
"17481826.33", "5118520.628", "5722064.277", "Au5", "C", "sample"
), class = "factor"), o066_072 = structure(c(9L, 7L, 8L, 1L), .Label = c
("12506876.82", 
"15059553.41", "17963658.12", "5147251.947", "5936120.075", "6757510.158", 
"Au5", "C", "sample"), class = "factor"), o066_077 = structure(c(9L, 
7L, 8L, 1L), .Label = c("11424841.46", "13873092.31", "16264095.9", 
"5139882.469", "5267420.049", "6093969.775", "Au32", "C", "sample"
), class = "factor"), o066_079 = structure(c(9L, 7L, 8L, 1L), .Label = c
("11310021.96", 
"12817107.8", "16850943.65", "4790059.15", "5497826.813", "6272350.228", 
"Au5", "C", "sample"), class = "factor"), o066_080 = structure(c(9L, 
7L, 8L, 6L), .Label = c("13251623.05", "17376126.66", "4975858.829", 
"5877369.947", "6459614.347", "9956435.681", "Au5", "C", "sample"
), class = "factor"), o066_102 = structure(c(8L, 1L, 1L, 3L), .Label = c("", 
"159", "24", "40", "6", "65", "91", "sample"), class = "factor"), 
    o066_103 = structure(c(2L, 1L, 1L, 1L), .Label = c("", "sample"
    ), class = "factor"), o066_104 = structure(c(4L, 1L, 1L, 
    1L), .Label = c("", "[M+H]+ 72.0781", "[M+Na]+ 51", "sample"
    ), class = "factor"), o066_105 = structure(c(8L, 1L, 1L, 
    2L), .Label = c("", "61.00708832", "68.9930209", "72.08063445", 
    "73.08397527", "73.98878773", "76.03904924", "sample"), class = 
"factor"), 
    pspectrum = c(NA, NA, NA, 61.00730019), isotopes = c(NA, 
    NA, NA, 194.0899), adduct = c(NA, NA, NA, 196.0904), mzmin = c(NA, 
    NA, NA, 36L), mzmax = c(NA, NA, NA, 9L), rtmin = c(NA, NA, 
    NA, 9L), rtmax = c(NA, NA, NA, 3L), npeaks = c(NA, NA, NA, 
    3L), sample.Au5.C = c(NA, NA, NA, 3L), sample.Au5.H = c(NA, 
    NA, NA, 3L), sample.Au5.L = c(NA, NA, NA, 3L), sample.Au32.M = c(NA, 
    NA, NA, 3L), sample.Au32.C = c(NA, NA, NA, 0.291527766), 
    sample.Au32.H = c(NA, NA, NA, 0.79589541), sample.Au32.L = c(NA, 
    NA, NA, 0.453622043), sample.Au5.M = c(NA, NA, NA, 0.870148042
    ), X2W.p.value.Nanoparticle = structure(c(1L, 1L, 1L, 2L), .Label = c("", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=59.95&mass_max=60.
05", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=67.936&mass_max=68.
036", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=71.024&mass_max=71.
124", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=72.027&mass_max=72.
127", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=72.932&mass_max=73.
032", 
    "http://metlin.scripps.edu/metabo_list.php?mass_min=74.982&mass_max=75.
082"
    ), class = "factor"), X2W.p.value.Treatment = structure(c(1L, 
    1L, 1L, 1L), .Label = c("", "HMDB00123, HMDB00925, HMDB12136"
    ), class = "factor"), X2W.adj.p.value.Nanoparticle = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_), X2W.adj.p.value.Treatment = c(NA, 
    NA, NA, NA), Metlin = c(NA, NA, NA, NA), HMDBtag = c(NA, 
    NA, NA, NA), HMDBinfo = c(NA, NA, NA, NA)), .Names = c("X", 
"rt", "mz", "o066_010", "o066_022", "o066_029", "o066_034", "o066_068", 
"o066_072", "o066_077", "o066_079", "o066_080", "o066_102", "o066_103", 
"o066_104", "o066_105", "pspectrum", "isotopes", "adduct", "mzmin", 
"mzmax", "rtmin", "rtmax", "npeaks", "sample.Au5.C", "sample.Au5.H", 
"sample.Au5.L", "sample.Au32.M", "sample.Au32.C", "sample.Au32.H", 
"sample.Au32.L", "sample.Au5.M", "X2W.p.value.Nanoparticle", 
"X2W.p.value.Treatment", "X2W.adj.p.value.Nanoparticle", "X2W.adj.p.value.
Treatment", 
"Metlin", "HMDBtag", "HMDBinfo"), row.names = c(NA, 4L), class = "data.frame")

I want to arrange all the columns that match with Au5 and C,  

then Au5 and L

then Au5 and H

then Au5 and M

and so on....

and do the same with Au32....

Just reordering the columns,matching the patterns above.

But I would be happy just with Au5 and C.

Thanks a lot

James

>----Messaggio originale----
>Da: smartpink111 at yahoo.com
>Data: 30/04/2014 11.02
>A: "pascalbells at libero.it"<pascalbells at libero.it>
>Ogg: re: [R] dataframe
>
>Could you dput the dataset?
>Also,the expected result...
>
>----------
>Sent from my Nokia
>
>------Original message------
>From: pascalbells at libero.it <pascalbells at libero.it>
>To: <smartpink111 at yahoo.com>
>Date: Wednesday, April 30, 2014 9:40:06 AM GMT+0200
>Subject: R: Re: [R] dataframe
>
>thank you for your help but it doesn't work....
>
>James
>
>
>
>>----Messaggio originale----
>>Da: smartpink111 at yahoo.com
>>Data: 29/04/2014 17.59
>>A: "r-help at r-project.org"<r-help at r-project.org>
>>Cc: "pascalbells at libero.it"<pascalbells at libero.it>
>>Ogg: Re: [R] dataframe
>>
>>Hi,
>>
>>It is better to show example data using ?dput().
>>
>>dat <- structure(list(row.names = 1:4, XYZ = c("sample", "sample2", 
>>"sample3", "sample4"), `000_001` = c("sample", "Au5", "C", "C"
>>), `000_002` = c("sample", "Au32", "C", "Au4"), `000_003` = c("sample", 
>>"Au5", "A", "AC")), .Names = c("row.names", "XYZ", "000_001", 
>>"000_002", "000_003"), class = "data.frame", row.names = c(NA, 
>>-4L))
>>
>>
>>dat[,intersect(grep("Au5", dat), grep("\\bC\\b",dat)),drop=FALSE]
>>#  000_001
>>#1  sample
>>#2     Au5
>>#3       C
>>#4       C
>>
>>A.K.
>>
>>
>>
>>
>>On Tuesday, April 29, 2014 9:45 AM, "pascalbells at libero.it" 
><pascalbells at libero.it> wrote:
>>Hello, 
>>thank you for accepting me into the list.
>>
>>I have the following dataframe:
>>
>>   row.names  X           Y       Z     000_001  000_002  000_003
>>1                    sample                     sample    sample     sample
>>2                    sample2                   Au5         Au32       Au5
>>3                    sample3                   C              C             
C
>>4                    ..........  
>>..
>>..

>n                    ...........                    ....            .......         
>............
>>
>>
>>I would like to select al the columns that have Au5 and C.
>>thank you in advance for your help,
>>james
>>
>>
>>    [[alternative HTML version deleted]]
>>
>>______________________________________________
>>R-help at r-project.org mailing list
>>https://stat.ethz.ch/mailman/listinfo/r-help
>>PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
>>and provide commented, minimal, self-contained, reproducible code.
>>
>>
>
>
>
>