R-alpha: predict.lm -- who ..?

Peter Dalgaard BSA p.dalgaard@kubism.ku.dk
15 Sep 1997 22:50:58 +0200


Ross Ihaka <ihaka@stat.auckland.ac.nz> writes:

> 
> Here is my (incomplete) version. This will work for full-rank models,
> and I suspect also for singular models (the comment is just a reminder
> to myself that I was thinking about this).
> 
> I set out to change the underlying structure so that case names
> would be propagated through to to the fitted values.  This doesn't
> happen at the moment because they are not handled within "model.matrix".
> A quick fix would be to replace dimnames(x) below by row.names(mf).
> 
> There is no computation of standard errors below, but the present
> version has something like the right computation.
> 
> Sorry that this is just a hint, but I'm really under pressure at
> present.  Anyone got any (applied) exam questions on linear models
> or glms handy?
> 	Ross
> 	
> 
> predict.lm <-
> function (fit, newdata) 
> {
> 	mf <- model.frame(formula(fit), data = newdata)
> 	# recreate the design matrix
> 	x <- model.matrix(terms(fit), mf)
> 	# grab the coefficients (order is correct?)
> 	b <- coef(fit)
> 	b[is.na(b)] <- 0
> 	# compute predictions
> 	p <- x %*% b
> 	# attach name information
> 	if(is.matrix(b)) {
> 		dimnames(p) <- list(dimnames(x)[[1]], dimnames(b)[[2]])
> 	}
> 	else {
> 		p <- as.vector(p)
> 		names(p) <- dimnames(x)[[1]]
> 	}
> 	p
> }
> 

Hum, maybe my version was closer after all. Thing was that I tried to
be smart and have it work without having to specify the dataframe
(which gets to be a terrible pain when you need to specify factors
with the same level structure as in the original). On second thought,
I decided to abandon that idea and stick with S(-plus?) compatibility.
However I did get the SE's done and soforth. Here's my effort:


"like.frame" <-
function (frame, ...) 
{
        if (!is.data.frame(frame)) 
                stop("like.frame: first argument must be a data frame")
        f <- list(...)
        class(f) <- "data.frame"
        n <- ncol(f)
        nm.base <- names(frame)
        nm.new <- names(f)
        if (is.null(nm.new)) 
                nm.new <- rep("", n)
        for (i in 1:n) {
                if (nm.new[i] == "") {
                        #positional match 
                        nm.new[i] <- nm.base[i]
                        j <- i
                }
                else j <- match(nm.new[i], nm.base)
                if (!is.na(j)) 
                        if (is.factor(frame[[j]])) {
                                lab <- levels(frame[[j]])
                                lev <- 1:nlevels(frame[[j]])
                                f.i <- f[[i]]
                                if (is.character(f.i)) 
                                 f.i <- match(f.i, lab)
                                f[[i]] <- factor(f.i, levels = lev, 
                                 labels = lab)
                        }
        }
        names(f) <- nm.new
        f
}
"predict.lm" <-
function (object, ...) 
{
        mod.fr <- model.frame(object)
        form <- formula(object)
        # x~a+b becomes ~a+b
        form <- as.call(list(form[[1]], form[[3]]))
        class(form) <- "formula"
        pred.fr <- like.frame(mod.fr, ...)
        # the following is a kludge to work around a bug in data.frame:
        zz <- attr(terms(form), "variables")
        zz[[1]] <- as.name("list")
        pred.fr <- eval(zz, pred.fr)
        class(pred.fr) <- "data.frame"
        X <- model.matrix(form, pred.fr)
        n <- NROW(object$qr$qr)
        p <- object$rank
        p1 <- 1:p
        piv <- object$qr$pivot[p1]
        r <- resid(object)
        f <- fitted(object)
        w <- weights(object)
        if (is.null(w)) 
                rss <- sum(r^2)
        else rss <- sum(r^2 * w)
        R <- chol2inv(object$qr$qr[p1, p1, drop = FALSE])
        est <- object$coefficients[piv]
        predictor <- c(X[, piv] %*% est)
        ip <- real(NROW(X))
        resvar <- rss/(n - p)
        vcov <- resvar * R
        for (i in (1:NROW(X))) {
                xi <- X[i, piv]
                ip[i] <- xi %*% vcov %*% xi
        }
        stderr1 <- sqrt(ip)
        stderr2 <- sqrt(resvar + ip)
        tt <- qt(0.975, n - p)
        conf.l <- predictor - tt * stderr1
        conf.u <- predictor + tt * stderr1
        pred.l <- predictor - tt * stderr2
        pred.u <- predictor + tt * stderr2
        z <- list(predictor = predictor, conf.l = conf.l, conf.u = conf.u, 
                pred.l = pred.l, pred.u = pred.u)
        class(z) <- "data.frame"
        z
}


-- 
   O__  ---- Peter Dalgaard             Blegdamsvej 3  
  c/ /'_ --- Dept. of Biostatistics     2200 Cph. N   
 (*) \(*) -- University of Copenhagen   Denmark      Ph: (+45) 35327918
~~~~~~~~~~ - (p.dalgaard@biostat.ku.dk)             FAX: (+45) 35327907
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-