[R] Pretty-printing multiple regression models

Ajay Narottam Shah ajayshah at mayin.org
Thu Aug 31 20:43:31 CEST 2006


A few days ago, I had asked this question. Consider this situation:
> x1 <- runif(100); x2 <- runif(100); y <- 2 + 3*x1 - 4*x2 + rnorm(100)
> m1 <- summary(lm(y ~ x1))
> m2 <- summary(lm(y ~ x2))
> m3 <- summary(lm(y ~ x1 + x2))

You have estimated 3 different "competing" models, and suppose you
want to present the set of models in one table. xtable(m1) is cool,
but that would give us 3 different tables.

What I want is this table:

-----------------------------------------------------------
                    M1             M2              M3
-----------------------------------------------------------
Intercept         0.0816         3.6292         2.2272
                 (0.5533)       (0.2316)***    (0.2385)***

x1                2.8151                        2.7606
                 (0.5533)***                   (0.3193)***

x2                              -4.2899        -4.2580
                                (0.401)***     (0.3031)***

$\sigma_e$        1.538          1.175          0.8873
$R^2$             0.2089         0.5385         0.7393
-----------------------------------------------------------

I was given feedback from this mailing list that this is a specialised
display and requires custom code. So I wrote some code. I will be very
happy if you could look at this code, and give me ideas on how to do
it better, and how to generalise it. I am most unhappy with the fact
that right now, I'm tied to the fact that summary.lm() gives you
something which has a $coefficients. Ideally this kind of display
should be general for all kinds of models.

My code produces two results:

    > numbers
                           M1      M2       M3
    Intercept          0.0691  3.1110   1.7831
    t                  0.2471 12.1860   6.8888
    x1                 2.6595      NA   2.7772
    t                  5.3837      NA   8.0206
    x2                     NA -3.2788  -3.3683
    t                      NA -7.6922 -10.1331
    Residual std. dev. 1.3567  1.2195   0.9505
    $R^2$              0.2283  0.3765   0.6251
    Adjusted $R^2$     0.2204  0.3701   0.6174

and:

    > specialised.latex.generation(numbers)
    \hline
     &  M1 &  M2 &  M3\\
    \hline
    Intercept &  0.0691 &  3.111 &  1.7831\\
     &  (0.2471) &  (12.186)$^\mbox{***} &  (6.8888)$^\mbox{***}\\[1mm]
    x1 &  2.6595 &  &  2.7772\\
     &  (5.3837)$^\mbox{***} &  &  (8.0206)$^\mbox{***}\\[1mm]
    x2 &  &  -3.2788 &  -3.3683\\
     &  &  (-7.6922)$^\mbox{***} &  (-10.1331)$^\mbox{***}\\[1mm]
    Residual std. dev. &  1.3567 &  1.2195 &  0.9505\\
    $R^2$ &  0.2283 &  0.3765 &  0.6251\\
    Adjusted $R^2$ &  0.2204 &  0.3701 &  0.6174\\
    \hline

mmp <- function(regressors, bottom.matter, models.names, allmodels) {
  numbers <- matrix(NA, nrow=(2*length(regressors))+length(bottom.matter),
                    ncol=length(models.names))
  colnames(numbers) <- models.names
  rownames(numbers) <- rep("t", nrow(numbers))

  baserow <- 1
  for (i in 1:length(regressors)) {
    if (regressors[i] == "Intercept") {
      regex <- "^\\(Intercept\\)$"
    } else {
      regex <- paste("^", regressors[i], "$", sep="")
    }
    rownames(numbers)[baserow] <- regressors[i]
    for (j in 1:length(allmodels)) {
      m <- allmodels[[j]]
      if (any(locations <- grep(regex, rownames(m$coefficients)))) {
        if (length(locations) > 1) {
          cat("Regex ", regex, " has multiple matches at model ", j, "\n")
          return(NULL)
        }
        numbers[baserow,j] <- as.numeric(sprintf("%.4f",
                                                 m$coefficients[locations,1]))
        numbers[baserow+1,j] <- as.numeric(sprintf("%.4f",
                                                   m$coefficients[locations,3]))
      }
    }
    baserow <- baserow + 2
  }

                                        # Now process the bottom.matter
  for (i in 1:length(bottom.matter)) {
    if (bottom.matter[i] == "sigma") {
      for (j in 1:length(allmodels)) {
        m <- allmodels[[j]]
        numbers[baserow,j] <- as.numeric(sprintf("%.4f", m$sigma))
      }
      rownames(numbers)[baserow] <- "Residual std. dev."
      baserow <- baserow + 1
    }
    
    if (bottom.matter[i] == "r.squared") {
      for (j in 1:length(allmodels)) {
        m <- allmodels[[j]]
        numbers[baserow,j] <- as.numeric(sprintf("%.4f", m$r.squared))
      }
      rownames(numbers)[baserow] <- "$R^2$"
      baserow <- baserow + 1
    }

    if (bottom.matter[i] == "adj.r.squared") {
      for (j in 1:length(allmodels)) {
        m <- allmodels[[j]]
        numbers[baserow,j] <- as.numeric(sprintf("%.4f", m$adj.r.squared))
      }
      rownames(numbers)[baserow] <- "Adjusted $R^2$"
      baserow <- baserow + 1
    }
  }
  numbers
}

# Given a 't' statistic, give me a string with
# '*' or '**' or '***' based on the 90%, 95% and 99% cutoffs on N(0,1)
stars <- function(t) {
  t <- abs(t)
  n <- -1 + as.numeric(cut(t,breaks=c(-0.01,-qnorm(c(0.05, 0.025, 0.005)),Inf)))
  if (n == 0) {
    return("")
  } else {
    return(paste("$^\\mbox{",
                 paste(rep("*", n), sep="", collapse=""),
                 "}", sep=""))
  }
}

specialised.latex.generation <- function(numbers) {
  cat("\\hline\n")
  for (j in 1:ncol(numbers)) {
    cat(" & ", colnames(numbers)[j])
  }
  cat("\\\\\n\\hline\n")
  for (i in 1:nrow(numbers)) {
    if (rownames(numbers)[i] == "t") {
      for (j in 1:ncol(numbers)) {
        if (is.na(numbers[i,j])) {
          cat(" & ")
        } else {
          cat(" & ", sprintf("(%s)%s", numbers[i,j], stars(numbers[i,j])))
        }
      }
      cat("\\\\[1mm]\n")
    } else {
      cat(rownames(numbers)[i])
      for (j in 1:ncol(numbers)) {
        if (is.na(numbers[i,j])) {
          cat(" & ")
        } else {
          cat(" & ", numbers[i, j])
        }
      }
      cat("\\\\\n")
    }
  }
  cat("\\hline\n")
}

x1 <- runif(100); x2 <- runif(100); y <- 2 + 3*x1 - 4*x2 + rnorm(100)
m1 <- summary(lm(y ~ x1))
m2 <- summary(lm(y ~ x2))
m3 <- summary(lm(y ~ x1 + x2))
numbers <- mmp(regressors=c("Intercept", "x1", "x2"),
               bottom.matter=c("sigma", "r.squared", "adj.r.squared"),
               models.names=c("M1", "M2", "M3"),
               allmodels=list(m1, m2, m3))
numbers
specialised.latex.generation(numbers)

-- 
Ajay Shah                                      http://www.mayin.org/ajayshah  
ajayshah at mayin.org                             http://ajayshahblog.blogspot.com
<*(:-? - wizard who doesn't know the answer.



More information about the R-help mailing list