[BioC] two questions about limma (cont.2)

Thu Mar 8 05:19:51 CET 2007

Dear members,
Thanks to you for your attention to my questions.
Special thanks to Dr. Smyth, the author and maintainer of limma package,
for his detailed answer.
However,questions remain. The two questions first appeared in Bioconductor
Digest, Vol 49, Issue 7 on Mar 7, 2007 .

-------Question 1: About NaNs after backgroundCorrect------------
I checked the data before and after correction. The components
("R","G","Rb" and "Gb") of RGList are all positive. The dispersed spots at
low intensites before backgroundCorrect (plotMA(RG)) shrink to a line or a
cluster after backgroundCorrect and normalizeWithinArrays. NaNs occur in
log(x) right during the backgroundCorrect step using method "normexp".
Therefore I investigated the function backgroundCorrect() and the method
normexp. They are as follows:

>RG.b<-backgroundCorrect(RG,method="normexp",offset=0)
> backgroundCorrect
function (RG, method = "subtract", offset = 0, printer = RG$printer,
    verbose = TRUE)
{
    if (is.null(RG$Rb) != is.null(RG$Gb))
        stop("Background values exist for one channel but not the other")
    method <- match.arg(method, c("none", "subtract", "half",
        "minimum", "movingmin", "edwards", "normexp", "rma"))
    if (is.null(RG$Rb) && is.null(RG$Gb))
        method <- "none"
    switch(method, subtract = {
        RG$R <- RG$R - RG$Rb
        RG$G <- RG$G - RG$Gb
    }, half = {
        RG$R <- pmax(RG$R - RG$Rb, 0.5)
        RG$G <- pmax(RG$G - RG$Gb, 0.5)
    }, minimum = {
        RG$R <- as.matrix(RG$R - RG$Rb)
        RG$G <- as.matrix(RG$G - RG$Gb)
        for (slide in 1:ncol(RG$R)) {
            i <- RG$R[, slide] < 1e-18
            if (any(i, na.rm = TRUE)) {
                m <- min(RG$R[!i, slide], na.rm = TRUE)
                RG$R[i, slide] <- m/2
            }
            i <- RG$G[, slide] < 1e-18
            if (any(i, na.rm = TRUE)) {
                m <- min(RG$G[!i, slide], na.rm = TRUE)
                RG$G[i, slide] <- m/2
            }
        }
    }, movingmin = {
        RG$R <- RG$R - ma3x3.spottedarray(RG$Rb, printer = printer,
            FUN = min, na.rm = TRUE)
        RG$G <- RG$G - ma3x3.spottedarray(RG$Gb, printer = printer,
            FUN = min, na.rm = TRUE)
    }, edwards = {
        one <- matrix(1, NROW(RG$R), 1)
        delta.vec <- function(d, f = 0.1) {
            quantile(d, mean(d < 1e-16, na.rm = TRUE) * (1 +
                f), na.rm = TRUE)
        }
        sub <- as.matrix(RG$R - RG$Rb)
        delta <- one %*% apply(sub, 2, delta.vec)
        RG$R <- ifelse(sub < delta, delta * exp(1 - (RG$Rb +
            delta)/RG$R), sub)
        sub <- as.matrix(RG$G - RG$Gb)
        delta <- one %*% apply(sub, 2, delta.vec)
        RG$G <- ifelse(sub < delta, delta * exp(1 - (RG$Gb +
            delta)/RG$G), sub)
    }, normexp = {
        for (j in 1:ncol(RG$R)) {
            x <- RG$G[, j] - RG$Gb[, j]
            out <- normexp.fit(x)
            RG$G[, j] <- normexp.signal(out$par, x)
            x <- RG$R[, j] - RG$Rb[, j]
            out <- normexp.fit(x)
            RG$R[, j] <- normexp.signal(out$par, x)
            if (verbose)
                cat("Corrected array", j, "\n")
        }
    }, rma = {
        require("affy")
        RG$R <- apply(RG$R - RG$Rb, 2, bg.adjust)
        RG$G <- apply(RG$G - RG$Gb, 2, bg.adjust)
    })
    RG$Rb <- NULL
    RG$Gb <- NULL
    if (offset) {
        RG$R <- RG$R + offset
        RG$G <- RG$G + offset
    }
    new("RGList", unclass(RG))
}
<environment: namespace:limma>

The method normexp is within the function backgroundCorrect. It is
excerpted from the function and pasted here.
normexp = {
        for (j in 1:ncol(RG$R)) {
            x <- RG$G[, j] - RG$Gb[, j]
            out <- normexp.fit(x)
            RG$G[, j] <- normexp.signal(out$par, x)
            x <- RG$R[, j] - RG$Rb[, j]
            out <- normexp.fit(x)
            RG$R[, j] <- normexp.signal(out$par, x)
            if (verbose)
                cat("Corrected array", j, "\n")
        }
    },

Then I modified the excerpted code and ran the code block as follows:
j=1   # j is from 1 to ncol(RG$R). Manually run the loop.
x <- RG$G[, j] - RG$Gb[, j]
out <- normexp.fit(x)
RG$G[, j] <- normexp.signal(out$par, x)
x <- RG$R[, j] - RG$Rb[, j]
out <- normexp.fit(x)
RG$R[, j] <- normexp.signal(out$par, x)

I found that the warnings of NaNs occur following the code "out <-
normexp.fit(x)".The output of this code block follows below.
> j=1
> x <- RG$G[, j] - RG$Gb[, j]
> out <- normexp.fit(x)
> RG$G[, j] <- normexp.signal(out$par, x)
> x <- RG$R[, j] - RG$Rb[, j]
> out <- normexp.fit(x)
Warning message:    <<<<<<<<<<<<<<<<<<<<<<-----Warning!
Produced NaNs in: log(x)
> RG$R[, j] <- normexp.signal(out$par, x)
> out
$par
[1] 105.927694   4.874661   8.145515

$m2loglik
[1] 352762.2

$convergence
[1] 0

Then I tried to trace the origin of the warning by running the function
normexp.fit step by step. I packed the if-else clause into a customized
function myfunq1234().The modified normexp.fit for step-by-step execution
is after the one embedded in the limma package. The code halts in the
middle and the error message points to something beyond my knowledge.
# normexp.fit Embedded in limma
> normexp.fit
function (x, trace = FALSE)
{
    isna <- is.na(x)
    if (any(isna))
        x <- x[!isna]
    if (length(x) < 4)
        stop("Not enough data: need at least 4 non-missing corrected
intensities")
    q <- quantile(x, c(0, 0.05, 0.1, 1), na.rm = TRUE, names = FALSE)
    if (q[1] == q[4])
        return(list(beta = q[1], sigma = 1, alpha = 1, m2loglik = NA,
            convergence = 0))
    if (q[2] > q[1]) {
        beta <- q[2]
    }
    else {
        if (q[3] > q[1]) {
            beta <- q[3]
        }
        else {
            beta <- q[1] + 0.05 * (q[4] - q[1])
        }
    }
    sigma <- sqrt(mean((x[x < beta] - beta)^2, na.rm = TRUE))
    alpha <- mean(x, na.rm = TRUE) - beta
    if (alpha <= 0)
        alpha <- 1e-06
    Results <- optim(par = c(beta, log(sigma), log(alpha)), fn =
normexp.m2loglik,
        control = list(trace = as.integer(trace)), x = x)
    list(par = Results$par, m2loglik = Results$value, convergence =
Results$convergence)
}
<environment: namespace:limma>

# Modified normexp.fit
isna <- is.na(x)
if (any(isna))
        x <- x[!isna]
if (length(x) < 4)
        stop("Not enough data: need at least 4 non-missing corrected
intensities")
q <- quantile(x, c(0, 0.05, 0.1, 1), na.rm = TRUE, names = FALSE)
if (q[1] == q[4])
        return(list(beta = q[1], sigma = 1, alpha = 1, m2loglik = NA,
convergence = 0))

myfunq1234<-function(q1,q2,q3,q4){
    if (q2 > q1) {
        beta <- q2
    }
    else {
        if (q3 > q1) {
            beta <- q3
        }
        else {
            beta <- q1 + 0.05 * (q4 - q1)
        }
    }
}
myfunq1234(q[1],q[2],q[3],q[4])
sigma <- sqrt(mean((x[x < beta] - beta)^2, na.rm = TRUE)) <<<<--Error
ocurs hereafter!
alpha <- mean(x, na.rm = TRUE) - beta
if (alpha <= 0)
        alpha <- 1e-06
Results <- optim(par = c(beta, log(sigma), log(alpha)), fn =
normexp.m2loglik,
        control = list(trace = as.integer(trace)), x = x)
list(par = Results$par, m2loglik = Results$value, convergence =
Results$convergence)

Based on the fact that all RG$R, RG$Rb, RG$G and RG$Gb are positive, I
think the doubt shed upon the microarray data may be removed.
I wonder whether anyone else has reported this warning.

-------Question 2: Output of Results--------
The topTable() can output the average logFC easily, but it cannot select
differentially expressed genes based upon the combination of p value and
logFC.
The decideTests() can easily output the differentially expressed genes
based upon the combination of p value and logFC, but it cannot output
average logFC.
Is there a function that combines the two advantages?

Thanks!