[BioC] Increasing rlm iterations for convergence in limma's normalizeRobustSpline

Paul Boutros Paul.Boutros at utoronto.ca
Wed Aug 11 06:33:54 CEST 2004


Hi again,

I've come across some cDNA array data that doesn't converge when I use
limma's robust-spline normalization.  I get this from:

> library(limma);
> setwd("c:/paul/dev/rat-mouse/limma/");
> targets <- readTargets();
> design <- cbind(c(1,1,1,1,-1,-1,-1,-1), c(0,0,0,0,1,1,1,1));
> colnames(design) <- c("Species", "Dye");
> read.series(targets$FileName,suffix=NULL,skip=29,sep="\t");
[1] "8 slides read"
> layout <- list(ngrid.r=12, ngrid.c=4, nspot.r=25, nspot.c=26);
> RG <- kooperberg(names=targets$FileName,layout=layout);
> RG$genes <- readGAL();
> MA <- normalizeWithinArrays(RG, layout, method="robustspline");
Warning message:
rlm failed to converge in 20 steps in: rlm.default(x, y, weights = w, method
= method, maxit = maxit)

To get around this I modified normalizeRobustSpline and
normalizeWithinArrays to accept a maxit parameter to pass to rlm (the code
is below).  This allows:

> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=20);
Warning message:
rlm failed to converge in 20 steps in: rlm.default(x, y, weights = w, method
= method, maxit = maxit)
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=21);
Warning message:
rlm failed to converge in 21 steps in: rlm.default(x, y, weights = w, method
= method, maxit = maxit)
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=22);
> MA <- normalizeWithinArrays(RG, layout, method="robustspline", maxit=23);

Which indicated that 22 iterations were necessary for convergence.

To test that this didn't break anything else I also:
> MA.old <- normalizeWithinArrays(RG, layout, method="printtiploess");
> # changed functions here: removed for email-display #
> MA.new <- normalizeWithinArrays(RG, layout, method="printtiploess",
maxit=25);
> write.table(MA.old$M, 'old.txt');
> write.table(MA.new$M, 'new.txt');

And then did a file-diff on the files:
	C:\Program Files\R\rw1091>comp
	Name of first file to compare: old.txt
	Name of second file to compare: new.txt
	Option:
	Comparing old.txt and new.txt...
	Files compare OK


The code is below -- hopefully it's useful to other people as well.

Now, my question is: should the fact that my data took more than 20
iterations to converge be sounding alarm-bells in my head?

Paul


### BEGIN CODE ###
normalizeWithinArrays <- function (object, layout = object$printer, method =
"printtiploess",
    weights = object$weights, span = 0.3, iterations = 4, controlspots =
NULL,
    df = 5, robust = "M", maxit=20)
{
    if (!is(object, "MAList"))
        object <- MA.RG(object)
    choices <- c("none", "median", "loess", "printtiploess",
        "composite", "robustspline")
    method <- match.arg(method, choices)
    if (method == "none")
        return(object)
    narrays <- ncol(object$M)
    if (method == "median") {
        for (j in 1:narrays) object$M[, j] <- object$M[, j] -
            median(object$M[, j], na.rm = TRUE)
        return(object)
    }
    switch(method, loess = {
        for (j in 1:narrays) {
            y <- object$M[, j]
            x <- object$A[, j]
            w <- weights[, j]
            object$M[, j] <- loessFit(y, x, w, span = span, iterations =
iterations)$residuals
        }
    }, printtiploess = {
        if (is.null(layout))
            stop("Layout argument not specified")
        ngr <- layout$ngrid.r
        ngc <- layout$ngrid.c
        nspots <- layout$nspot.r * layout$nspot.c
        for (j in 1:narrays) {
            spots <- 1:nspots
            for (gridr in 1:ngr) for (gridc in 1:ngc) {
                y <- object$M[spots, j]
                x <- object$A[spots, j]
                w <- weights[spots, j]
                object$M[spots, j] <- loessFit(y, x, w, span = span,
                  iterations = iterations)$residuals
                spots <- spots + nspots
            }
        }
    }, composite = {
        if (is.null(layout))
            stop("Layout argument not specified")
        if (is.null(controlspots))
            stop("controlspots argument not specified")
        ntips <- layout$ngrid.r * layout$ngrid.c
        nspots <- layout$nspot.r * layout$nspot.c
        for (j in 1:narrays) {
            y <- object$M[, j]
            x <- object$A[, j]
            w <- weights[, j]
            f <- is.finite(y) & is.finite(x) & is.finite(w)
            y[!f] <- NA
            fit <- loess(y ~ x, weights = w, span = span, subset =
controlspots,
                na.action = na.exclude, degree = 0, surface = "direct",
                family = "symmetric", trace.hat = "approximate",
                iterations = iterations)
            alpha <- global <- y
            global[f] <- predict(fit, newdata = x[f])
            alpha[f] <- (rank(x[f]) - 1)/sum(f)
            spots <- 1:nspots
            for (tip in 1:ntips) {
                y <- object$M[spots, j]
                x <- object$A[spots, j]
                w <- weights[spots, j]
                local <- loessFit(y, x, w, span = span, iterations =
iterations)$fitted
                object$M[spots, j] <- object$M[spots, j] - alpha[spots] *
                  global[spots] - (1 - alpha[spots]) * local
                spots <- spots + nspots
            }
        }
    }, robustspline = {
        if (is.null(layout))
            stop("Layout argument not specified")
        for (j in 1:narrays) object$M[, j] <-
normalizeRobustSpline(object$M[,
            j], object$A[, j], layout, df = df, method = robust,
maxit=maxit)
    })
    object
}


normalizeRobustSpline <- function (M, A, layout, df = 5, method = "M",
maxit=20)
{
    require(MASS)
    require(splines)
    ngrids <- layout$ngrid.r * layout$ngrid.c
    nspots <- layout$nspot.r * layout$nspot.c
    O <- is.finite(M) & is.finite(A)
    X <- matrix(NA, ngrids * nspots, df)
    X[O, ] <- ns(A[O], df = df, intercept = TRUE)
    x <- X[O, , drop = FALSE]
    y <- M[O]
    w <- rep(1, length(y))
    s <- summary(rlm(x, y, weights = w, method = method, maxit=maxit),
method = "XtWX",
        correlation = FALSE)
    beta0 <- s$coefficients[, 1]
    covbeta0 <- s$cov * s$stddev^2
    beta <- array(1, c(ngrids, 1)) %*% array(beta0, c(1, df))
    covbeta <- array(0, c(ngrids, df, df))
    spots <- 1:nspots
    for (i in 1:ngrids) {
        o <- O[spots]
        y <- M[spots][o]
        if (length(y)) {
            x <- X[spots, ][o, , drop = FALSE]
            r <- qr(x)$rank
            if (r < df)
                x <- x[, 1:r, drop = FALSE]
            w <- rep(1, length(y))
            s <- summary(rlm(x, y, weights = w, method = method,
maxit=maxit),
                method = "XtWX", correlation = FALSE)
            beta[i, 1:r] <- s$coefficients[, 1]
            covbeta[i, 1:r, 1:r] <- s$cov * s$stddev^2
        }
        spots <- spots + nspots
    }
    res.cov <- cov(beta) - apply(covbeta, c(2, 3), mean)
    Sigma0 <- covbeta0 * max(0, sum(diag(res.cov))/sum(diag(covbeta0)))
    spots <- 1:nspots
    for (i in 1:ngrids) {
        beta[i, ] <- beta0 + Sigma0 %*% solve(Sigma0 + covbeta[i,
            , ], beta[i, ] - beta0)
        o <- O[spots]
        x <- X[spots, ][o, ]
        M[spots][o] <- M[spots][o] - x %*% beta[i, ]
        M[spots][!o] <- NA
        spots <- spots + nspots
    }
    M
}
### END CODE ###



More information about the Bioconductor mailing list