[BioC] Regarding Illumina's WG-DASL.

abdul rawoof abdul87.edu at gmail.com
Tue Apr 2 12:39:06 CEST 2013


Hello,

I am doing Differential Gene Expression analysis for illumina's WG-DASL
data using R Bioconductor and GenomeStudio.
I am using Quantile normalisation and BH-FDR on both platform for finding
DEgenes. But the result is not comparable from both platform.
Please find the attached file for R Script and final result from both
platform and suggest me for any corrections.

Sincerely thanks,
Abdul Rawoof,
-------------- next part --------------
> BSData  <- readBeadSummaryData(dataFile="SampleProbeProfile.txt",sep="\t", skip=0, ProbeID="ProbeID",
   columns = list(exprs ="AVG_Signal",se.exprs="BEAD_STDERR",nObservations = "Avg_NBEADS", Detection="Detection Pval"))
> slotNames(BSData)
> pData(BSData)
> dim(BSData)
> names(assayData(BSData))
> boxplot(as.data.frame(exprs(BSData)),las=2,outline=FALSE,xlab="Samples",ylab="Average Signals",main="Boxplot for Non-Normalised Avg_Signals")
> boxplot(as.data.frame(log2(exprs(BSData))),las=2,outline=FALSE,xlab="Samples",ylab="Log2Average Signals",main="Boxplot for Non-Normalised Log2Avg_Signals")
> boxplot(as.data.frame(nObservations(BSData)),las=2,outline=FALSE,xlab="Samples",ylab="No of BEADS",main="Boxplot for No.of BEADS per Sample")
> clust <- dist(t(exprs(BSData)))
> hclust <- hclust(clust,method="complete")
> plot(hclust,xlab="SAMPLES",ylab="Heights",main="Cluster Dendogram Before Normalisation")
> g <- rownames(exprs(BSData))[1:10]

> plotMAXY(log2(exprs(BSData)), arrays =c(1,7), genesToLabel = g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(1,7),labelpch=20,foldLine=2.46,log=FALSE)

> plotMAXY(log2(exprs(BSData)), arrays =c(6,8,9), genesToLabel = g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(6,8,9),labelpch=20,foldLine=2.46,log=FALSE)

> plotMAXY(log2(exprs(BSData)), arrays =c(2,4,5), genesToLabel = g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(2,4,5),labelpch=20,foldLine=2.46,log=FALSE)

> plotMAXY(log2(exprs(BSData)), arrays =c(10,11,12), genesToLabel = g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(10,11,12),labelpch=20,foldLine=2.46,log=FALSE)

> plotMAXY(log2(exprs(BSData)), arrays =c(2,3,4), genesToLabel = g,labelCol="red", labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(log2(exprs(BSData)), arrays =c(2,3,4),labelpch=20,foldLine=2.46,log=FALSE)
***************************************************************************************************************
> BSNorm <- normaliseIllumina(BSData, method="quantile",transform="log2")
>  write.table(exprs(BSNorm),file="QNormLog2AvgSingal.txt",sep="\t")
> boxplot(exprs(BSNorm),las=3,outline=F, col=colors,xlab="Samples", ylab="Log2_Avg_Signals", main="Boxplot For Normalised Log2AVG_SIGNALS",cex.axis=0.9)
***************************************************************************************************************
> clust <- dist(t(BSNorm))
> hc <-hclust(clust,method="complete")
> plot(hc,xlab="Samples",main="Cluster Dendogram After Normalisation")
***************************************************************************************************************
> plotMAXY(BSNorm, arrays =c(1,7), labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(2,4,5), labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(10,11,12), labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(6,8,9), labelpch=20,foldLine=2.46,log=FALSE)
> plotMAXY(BSNorm, arrays =c(2,3,4), labelpch=20,foldLine=2.46,log=FALSE)
****************************************************************************************************************
> ###Differential Expression Analysis ###
> design <- matrix(nrow=12,ncol=5,0)
> colnames(design)=c("D1_R","D1_NR","D2_R","D2_NR","Control")
> design[which(strtrim(colnames(exprs(BSNorm)),4)=="D1_R",),1]=1
> design[which(strtrim(colnames(exprs(BSNorm)),5)=="D1_NR",),2]=1
> design[which(strtrim(colnames(exprs(BSNorm)),4)=="D2_R",),3]=1
> design[which(strtrim(colnames(exprs(BSNorm)),5)=="D2_NR",),4]=1
> design[which(strtrim(colnames(exprs(BSNorm)),7)=="Control",),5]=1
*****************************************************************************************************************
> fit <-lmFit(exprs(BSNorm),design)
> cont.matrix <- makeContrasts(D1RvsC= D1_R-Control,D1NRvsC=D1_NR-Control,D2RvsC=D2_R-Control,D2NRvsC=D2_NR-Control,levels=design)
> fit1 <- contrasts.fit(fit,cont.matrix)
> ebfit <- eBayes(fit1)
*****************************************************************************************************************
> dTest <- decideTests(ebfit,p.value=0.05,lfc=0) 
> summary(dTest)
   D1RvsC D1NRvsC D2RvsC D2NRvsC
-1      1       0      3       2
0   29373   29377  29326   29374
1       3       0     48       1
*****************************************************************************************************************
>topD1R=toptable(ebfit[,1],coef="D1RvsC",number=Inf,genelist=fit1$genes,adjust.method="BH",p.value=1,lfc=0)

> topD1NR =toptable(ebfit[,2],coef="D1NRvsC",number=Inf,genelist=fit1$genes,adjust.method="BH",p.value=1,lfc=0)

> topD2R =toptable(ebfit[,3],coef="D2RvsC",number=Inf,genelist=fit1$genes,adjust.method="BH",p.value=1,lfc=0)

> topD2NR =toptable(ebfit[,4],coef="D2NRvsC",number=Inf,genelist=fit1$genes,adjust.method="BH",p.value=1,lfc=0)
*****************************************************************************************************************
> vennDiagram(dTest[,1:3], main= " VennDiagram for Differentially Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
> vennDiagram(dTest[,c(1,3,4)], main= " VennDiagram for Differentially Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
> vennDiagram(dTest[,3:4], main= " VennDiagram for Differentially Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
> vennDiagram(dTest[,1:2], main= " VennDiagram for Differentially Expressed Genes",include=c("up","down"),counts.col=c("red","green"))
*****************************************************************************************************************
> smoothScatter(ebfit[,1]$Amean,ebfit[,1]$coefficients,xlab="Average Intensity",ylab="Log_Ratio",main="SmoothScatter plot for D1R_vs_Control",pch=1.8,cex=.30)

> smoothScatter(ebfit[,2]$Amean,ebfit[,2]$coefficients,xlab="Average Intensity",ylab="Log_Ratio",main="SmoothScatter plot for D1NR_vs_Control",pch=1.8,cex=.30)

> smoothScatter(ebfit[,3]$Amean,ebfit[,3]$coefficients,xlab="Average Intensity",ylab="Log_Ratio",main="SmoothScatter plot for D2R_vs_Control",pch=1.8,cex=.30)

> smoothScatter(ebfit[,4]$Amean,ebfit[,4]$coefficients,xlab="Average Intensity",ylab="Log_Ratio",main="SmoothScatter plot for D2NR_vs_Control",pch=1.8,cex=.30)
*****************************************************************************************************************
> par(mfrow=c(2,2))
> volcanoplot(ebfit[,1],coef=1,highlight=5,names=fit1$genes$TargetID,xlab="Log_Fold_Change",
    ylab="Log_Odds",main="VolcanoPlots of DEgenes for D1_R",pch=1,cex=.25)

> volcanoplot(ebfit[,2],coef=1,highlight=5,names=fit1$genes$TargetID,xlab="Log_Fold_Change",
    ylab="Log_Odds",main="VolcanoPlots of DEgenes for D1_NR",pch=1,cex=.25)

> volcanoplot(ebfit[,3],coef=1,highlight=5,names=fit1$genes$TargetID,xlab="Log_Fold_Change",
    ylab="Log_Odds",main="VolcanoPlots of DEgenes for D2_R",pch=1,cex=.25)

> volcanoplot(ebfit[,4],coef=1,highlight=5,names=fit1$genes$TargetID,xlab="Log_Fold_Change",
    ylab="Log_Odds",main="VolcanoPlots of DEgenes for D2_NR",pch=1,cex=.25)
*****************************************************************************************************************
>library(Heatplus)
> rnD1R <- rownames(topD1R)[topD1R$P.Value<0.05]  
> rnD1R <- as.numeric(rnD1R)
> DEgeneD1R <-BSNorm[rnD1R,c(1,7,10,11,12)]
> regD1R =regHeatmap(exprs(DEgeneD1R))
> plot(regD1R)
*****************************************************************************************************************

> rnD1NR <- rownames(topD1NR)[topD1NR$P.Value<0.05] 
> rnD1NR <- as.numeric(rnD1NR)
> DEgeneD1NR <-BSNorm[rnD1NR,c(6,8,9,10,11,12)]
> regD1NR=regHeatmap(exprs(DEgeneD1NR) )
> plot(regD1NR)
*****************************************************************************************************************
> rnD2R <- rownames(topD2R)[topD2R$P.Value<0.05]   
> rnD2R <- as.numeric(rnD2R)
> DEgeneD2R <-BSNorm[rnD2R,c(3,10,11,12)]
> regD2R=regHeatmap(exprs(DEgeneD2R) )
> plot(regD2R)
*****************************************************************************************************************
> rnD2NR <- rownames(topD2NR)[topD2NR$P.Value<0.05]   
> rnD2NR <- as.numeric(rnD2NR)
> DEgeneD2NR <-BSNorm[rnD2NR,c(2,4,5,10,11,12)]
> regD2NR=regHeatmap(exprs(DEgeneD2NR))
> plot(regD2NR)

#######XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX##############


More information about the Bioconductor mailing list