[BioC] Sam always wants more observations

Charles Crane ccrane at purdue.edu
Thu May 4 14:35:16 CEST 2006


Dear Sirs, 
    I am running Bioconductor with Biobase 1.8.0, affy 1.8.1, siggenes
1.4.0, and affyPLM 1.6.0, under R 2.3.1 and MacOS X 10.3.9.  I am trying to
identify SNPs between two barley varieties on the basis of Affy chips, which
ReadAffy reads without evident errors.  The next paragraph gives two session
histories, each ending in an error message that I do not really understand,
unless something wants the data transposed.  Could someone please tell me
what I am doing wrong or leaving out?

>library(affy)
>library(siggenes)
> sidata <- ReadAffy(filenames = c("000113_5hr_kindered_H20_inoc_2x2.CEL",
"000114_12hr_kindered_H20_inoc_3bx1.CEL",
+  "000157_24hr_Kindered_H2O_Inoc_1.CEL",
"000158_0hr_Kindered_H2O_Inoc_4.CEL",
"000189_0hr_403-rep2_H2O_inoc_21b.CEL",
+  "000190_5hr_403-rep2_H2O_inoc_22b.CEL",
"000191_12hr_403-rep2_H2O_inoc_23b.CEL",
"000192_24hr_403-rep2_H2O_inoc_24b.CEL",
+  "000122_5hr_peruvian_H20_inoc_14bx1.CEL",
"000123_12hr_peruvian_H20_inoc_15bx1.CEL",
"000141_Non-host-resistant-barley_16bx2.CEL",
+  "000161_0hr_Peruvian_H2O_inoc_13.CEL",
"000201_0hr_405-rep2_H2O_inoc_33b.CEL",
"000202_5hr_405-rep2_H2O_inoc_34b.CEL",
+  "000203_12hr_405-rep2_H2O_inoc_35b.CEL",
"000204_24hr_405-rep2_H2O_inoc_36b.CEL"),
+  sampleNames = c("000113_5hr_kindered_H20_inoc_2x2",
"000114_12hr_kindered_H20_inoc_3bx1", "000157_24hr_Kindered_H2O_Inoc_1",
+  "000158_0hr_Kindered_H2O_Inoc_4", "000189_0hr_403-rep2_H2O_inoc_21b",
"000190_5hr_403-rep2_H2O_inoc_22b",
+  "000191_12hr_403-rep2_H2O_inoc_23b", "000192_24hr_403-rep2_H2O_inoc_24b",
"000122_5hr_peruvian_H20_inoc_14bx1",
+  "000123_12hr_peruvian_H20_inoc_15bx1",
"000141_Non-host-resistant-barley_16bx2", "000161_0hr_Peruvian_H2O_inoc_13",
+  "000201_0hr_405-rep2_H2O_inoc_33b", "000202_5hr_405-rep2_H2O_inoc_34b",
"000203_12hr_405-rep2_H2O_inoc_35b",
+  "000204_24hr_405-rep2_H2O_inoc_36b"),
+  phenoData = "Hordeumphenodata.txt")
> sirma <- bg.correct(sidata, method = "rma")
> sinorm <- normalize(sirma)
> siprobeintensities <- as.data.frame(probes(sinorm, "pm"))
> write.table(siprobeintensities, file = "siprobeintensities.txt")
> samout <- sam(siprobeintensities, cl = c(1, 1, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1,
2, 2, 2, 2), method = "cat.stat", B = 1000, ran = 11279)
Error in FUN(data, cl, ...) : There must be at least 10 observations in each
group.

I tried again with a fresh R session and an updated copy of
Hordeumphenodata.txt:
>library(affy)
>library(siggenes)
>sidata <- ReadAffy(filenames = c("000113_5hr_kindered_H20_inoc_2x2.CEL",
"000114_12hr_kindered_H20_inoc_3bx1.CEL",
 "000122_5hr_peruvian_H20_inoc_14bx1.CEL",
"000123_12hr_peruvian_H20_inoc_15bx1.CEL",
"000141_Non-host-resistant-barley_16bx2.CEL",
 "000157_24hr_Kindered_H2O_Inoc_1.CEL",
"000158_0hr_Kindered_H2O_Inoc_4.CEL",
"000159_0hr_Kindered_S_passerini_inoc_5.CEL",
 "000161_0hr_Peruvian_H2O_inoc_13.CEL",
"000162_0hr_Peruvian_S_passerini_17.CEL",
"000189_0hr_403-rep2_H2O_inoc_21b.CEL",
 "000190_5hr_403-rep2_H2O_inoc_22b.CEL",
"000191_12hr_403-rep2_H2O_inoc_23b.CEL",
"000192_24hr_403-rep2_H2O_inoc_24b.CEL",
 "000193_0hr_403-rep2_inoc-s_pass_25b.CEL",
"000201_0hr_405-rep2_H2O_inoc_33b.CEL",
"000202_5hr_405-rep2_H2O_inoc_34b.CEL",
 "000203_12hr_405-rep2_H2O_inoc_35b.CEL",
"000204_24hr_405-rep2_H2O_inoc_36b.CEL",
"000205_0hr_405-rep2_inoc-s_pass_37b.CEL"),
 sampleNames = c("000113_5hr_kindered_H20_inoc_2x2",
"000114_12hr_kindered_H20_inoc_3bx1", "000122_5hr_peruvian_H20_inoc_14bx1",
 "000123_12hr_peruvian_H20_inoc_15bx1",
"000141_Non-host-resistant-barley_16bx2", "000157_24hr_Kindered_H2O_Inoc_1",
 "000158_0hr_Kindered_H2O_Inoc_4", "000159_0hr_Kindered_S_passerini_inoc_5",
"000161_0hr_Peruvian_H2O_inoc_13",
 "000162_0hr_Peruvian_S_passerini_17", "000189_0hr_403-rep2_H2O_inoc_21b",
"000190_5hr_403-rep2_H2O_inoc_22b",
 "000191_12hr_403-rep2_H2O_inoc_23b", "000192_24hr_403-rep2_H2O_inoc_24b",
"000193_0hr_403-rep2_inoc-s_pass_25b",
 "000201_0hr_405-rep2_H2O_inoc_33b", "000202_5hr_405-rep2_H2O_inoc_34b",
"000203_12hr_405-rep2_H2O_inoc_35b",
 "000204_24hr_405-rep2_H2O_inoc_36b",
"000205_0hr_405-rep2_inoc-s_pass_37b"),
 phenoData = "Hordeumphenodata.txt")
> sirma <- bg.correct(sidata, method = "rma")
> sinorm <- normalize(sirma)
> siprobeintensities <- as.data.frame(probes(sinorm, "pm"))
> write.table(siprobeintensities, file = "siprobeintensities.txt")
> sicl <- c(1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2)
> samout <- sam(siprobeintensities, sicl, method = "cat.stat", B = 1000, ran =
11279)
Error in FUN(data, cl, ...) : There should be at least 25 samples.

    Also, how can I verify that the Affy intensities have been read
correctly?  I get this bad feeling that a Perl-style array could be read in
orthogonal to its proper sequence.  The file "siprobeintensities.txt" has
251438 rows and 20 numerical columns after the probe identifier.
    Thank you for your time, attention, and for pointing me to wherever the
usage of sam for SNPs is more fully documented--it would help if a vignette
would give the entire data flow from CEL file to output of the sam object.

Charles Crane
USDA-ARS, MWA, Crop Production and Pest Control Research Unit
Department of Botany and Plant Pathology
Purdue University



More information about the Bioconductor mailing list