[BioC] edgeR with two different groups (unpaired) at two time points (paired): Need some input

Tue Oct 22 20:03:32 CEST 2013

Hi!
The edgeR manual is quite nice, but I'm not quite sure if I'm on the 
right track..

The question to answer is: "Is there any significantly changed genes in 
diabetics at time point 2, compared to healthy?"

Heres my codes:

#Making the design matrix

status <- factor(c(rep("Healthy",26), rep("Diabetic",22)), 
levels=c("Healthy","Diabetic"))
patients <- 
factor(c(88,87,86,83,82,81,75,72,71,70,13,08,01,88,87,86,83,82,81,75,72,71,70,13,08,01,79,77,76,73,67,62,61,55,53,21,04,79,77,76,73,67,62,61,55,53,21,04))
timepoints = 
as.factor(c(1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2))
design <- model.matrix(~patients + timepoints*status)
> colnames(design)
  [1] "(Intercept)"                "patients4"
  [3] "patients8"                  "patients13"
  [5] "patients21"                 "patients53"
  [7] "patients55"                 "patients61"
  [9] "patients62"                 "patients67"
[11] "patients70"                 "patients71"
[13] "patients72"                 "patients73"
[15] "patients75"                 "patients76"
[17] "patients77"                 "patients79"
[19] "patients81"                 "patients82"
[21] "patients83"                 "patients86"
[23] "patients87"                 "patients88"
[25] "timepoints2"                "statusDiabetic"
[27] "timepoints2:statusDiabetic"

#Reading in HTSeq data

description <- c("test");
fileNames <- 
list.files(path="/Volumes/timemachine/HTseq_DEseq2",pattern="*.txt");
files <- sort(fileNames,decreasing=TRUE);
targets <- data.frame(files=files, group=status, 
description=description);
library(edgeR);
d <- 
readDGE(targets,path="/Volumes/timemachine/HTseq_DEseq2",skip=5,comment.char="!");
colnames(d) 
<-(c("N288","N287","N286","N283","N282","N281","N275","N272","N271","N270","N213","N208","N201","N188","187","N186","N183","N182","N181","N175","N172","N171","N170","N113","N108","N101","D279","D277","D276","D273","D267","D262","D261","D255","D253","D221","D204","D179","D177","D176","D173","D167","D162","D161","D155","D153","D121","D104"));
d$samples;
dim(d)

#Normalization

d <- (calcNormFactors(d,method="RLE"));

#Estimating the dispersion:

d <- estimateCommonDisp(d,verbose=TRUE)
Disp = 0.05823 , BCV = 0.2413
d <- estimateTagwiseDisp(d,trend="none")

#diffeksp

> fit <- glmFit(d,design)
Error in glmFit.default(y = y$counts, design = design, dispersion = 
dispersion,  :
   Design matrix not of full rank.  The following coefficients not 
estimable:
  statusDiabetic

Can someone tell me why I get this error?

Thanks alot!