[R] Rewriting the Biplot Function

Boris Steipe boris.steipe at utoronto.ca
Tue Jan 20 21:21:35 CET 2015


Scott -

A few months ago I posted on this list a modified version of biplot that takes a colour argument (and preserves the axes information, so you can use points() ). I don't have time right now to experiment and look at your code, but perhaps this does "out of the box" what you need.

Cheers,
Boris

====  Previous post ==========================================================
Since I've wanted this capability for some time, I modified the original biplot() to accept a type parameter type={"t" (Default) | "p" | "n"}. For "t", the function behaves almost exactly as before. For "p" it plots points, and should accept all the usual arguments for that. For "n" it plots nothing except the axes. You can then add the points as desired.

I also added two parameters col.arrows = "red", and col.text = "black" to have extra control.

Here is an example. (Note, you have to load the function, below, first.





library(MASS)
data(crabs)
PRC <- prcomp(crabs[, 4:8])

myBiplot(PRC)
myBiplot(PRC, choices=2:3, cex = 0.7, col.text="#445599") # much as before


# use filled points, color by the value found in column 4 of the data
r <- range(crabs[,4]) 
n <- 15
cv <- cm.colors(n)  
c <- cv[cut(crabs[,4],n)]  
myBiplot(PRC, choices=2:3, type = "p", pch=20, col=c, col.arrows = "#FF6600")


# finally: plot nothing then use points() for detailed control 
myBiplot(PRC, choices=2:3, type = "n")  # no points

# blue/orange crab males/females - as given by rows in the data
points(PRC$x[  1: 50, 2:3], pch=21, bg="#0066FF")
points(PRC$x[ 51:100, 2:3], pch=24, bg="#0066FF")
points(PRC$x[101:150, 2:3], pch=21, bg="#FF6600")
points(PRC$x[151:200, 2:3], pch=24, bg="#FF6600")


==============================================================
myBiplot <- function (x, choices = 1L:2L, scale = 1,
                     pc.biplot = FALSE, var.axes = TRUE,
                     type = "t",
                     col,
                     col.arrows = "#FF0000",
                     col.text = "#000000",
                     cex = rep(par("cex"), 2),
                     expand = 1, 
                     xlabs = NULL, ylabs = NULL,
                     xlim = NULL, ylim = NULL, 
                     main = NULL, sub = NULL,
                     xlab = NULL, ylab = NULL, 
                     arrow.len = 0.1,
                     ...
                     )

{
	if (length(choices) != 2L) 
       stop("length of choices must be 2")
   if (!length(scores <- x$x)) 
       stop(gettextf("object '%s' has no scores", deparse(substitute(x))), 
           domain = NA)
   if (is.complex(scores)) 
       stop("biplots are not defined for complex PCA")

   lam <- x$sdev[choices]
   n <- NROW(scores)
   lam <- lam * sqrt(n)

   if (scale < 0 || scale > 1) 
       warning("'scale' is outside [0, 1]")
   if (scale != 0) 
       lam <- lam^scale
   else lam <- 1
   if (pc.biplot) 
       lam <- lam/sqrt(n)

   y <- t(t(x$rotation[, choices]) * lam)
   x <- t(t(scores[, choices])/lam)  # note that from here on
                                     # x is no longer the PC object
                                     # originally pased into the function
   n <- nrow(x)
   p <- nrow(y)

   if (missing(xlabs)) {
       xlabs <- dimnames(x)[[1L]]
       if (is.null(xlabs)) 
           xlabs <- 1L:n
   }
   xlabs <- as.character(xlabs)
   dimnames(x) <- list(xlabs, dimnames(x)[[2L]])

   if (missing(ylabs)) {
       ylabs <- dimnames(y)[[1L]]
       if (is.null(ylabs)) 
           ylabs <- paste("Var", 1L:p)
   }
   ylabs <- as.character(ylabs)
   dimnames(y) <- list(ylabs, dimnames(y)[[2L]])

   if (length(cex) == 1L) 
       cex <- c(cex, cex)

   unsigned.range <- function(x) c(-abs(min(x, na.rm = TRUE)), 
       abs(max(x, na.rm = TRUE)))
   rangx1 <- unsigned.range(x[, 1L])
   rangx2 <- unsigned.range(x[, 2L])
   rangy1 <- unsigned.range(y[, 1L])
   rangy2 <- unsigned.range(y[, 2L])

   if (missing(xlim) && missing(ylim)) 
       xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
   else if (missing(xlim)) 
       xlim <- rangx1
   else if (missing(ylim)) 
       ylim <- rangx2

   ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
   on.exit(par(op))
   op <- par(pty = "s")
   if (!is.null(main)) 
       op <- c(op, par(mar = par("mar") + c(0, 0, 1, 0)))

   # first, plot scores - either normally, or as row labels
   if (type == "p") {
	    plot(x, type = type, xlim = xlim, ylim = ylim, col = col, 
       xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
   }
   else if (type == "t") {
       plot(x, type = "n", xlim = xlim, ylim = ylim, 
            xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
       text(x, xlabs, cex = cex[1L], col = col.text, ...)
   	
   }
   else if (type == "n") {  # plot an empty frame
	    plot(x, type = type, xlim = xlim, ylim = ylim, 
       xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
   }

   par(new = TRUE)
   dev.hold()
   on.exit(dev.flush(), add = TRUE)
   plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim * 
       ratio, xlab = "", ylab = "", col = col.arrows, ...)
   axis(3, col = col.arrows, ...)
   axis(4, col = col.arrows, ...)
   box(col = "#000000")
   text(y, labels = ylabs, cex = cex[2L], col = col.arrows, ...)
   if (var.axes) 
       arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col.arrows, 
           length = arrow.len)
   # now replot into xlim, ylim scaled by lam, to reset par("usr")
   # to the  correct values needed for subsequent application of points(),
   # text() etc.
   par(new = TRUE)
   dev.hold()
   on.exit(dev.flush(), add = TRUE)
	plot(0, type = "n", xlim = xlim * lam[1], ylim = ylim * lam[2], 
   xlab = '', ylab = '', sub = '', main = '', xaxt='n', yaxt='n', axes=FALSE)

   invisible()

}

==============================================================



On Jan 20, 2015, at 2:14 PM, Scott Robinson <Scott.Robinson at glasgow.ac.uk> wrote:

> Dear R-Help,
> 
> I have been trying to rewrite the base biplot.prcomp function but am coming across some errors I don't understand. It seems like R is still 'expecting' the same values despite me rewriting and renaming the methods. My aim is simply to have an additional biplot function which could use a vector of colours, where each position of the vector gives the colour for a variable name and its arrow.
> 
> Another issue I have with the default function is that when I save a very large image (to get good seperation and readability when looking at hundreds of variables) the names are very displaced from the arrows, but I haven't even started looking into that yet...
> 
> I ran the code on my actual data and got the error:
>> colouredBiplot(prc, yCol=rep("#FFFF00", 962))
> Error in if (yCol == NULL) { : argument is of length zero
>> traceback()
> 2: colouredBiplot.internal(t(t(scores[, choices])/lam), t(t(x$rotation[,
>       choices]) * lam), yCol, ...) at colouredBiplot.R#103
> 1: colouredBiplot(prc, yCol = rep("#FFFF00", 962))
> 
> However when I tried creating a small example I got a different error:
> 
>> options(stringsAsFactors=F)
>> source("C:/Work/InGenious/InGen/colouredBiplot.R")
>> 
>> random <- matrix(rexp(200, rate=.1), ncol=20)
>> prc <- prcomp(random, center=T, scale=T)
>> colouredBiplot(random, rep("#FFFF00", 20))
> Error in colouredBiplot(random, rep("#FFFF00", 20)) :
>  length of choices must be 2
>> sessionInfo()
> R version 3.0.2 (2013-09-25)
> Platform: x86_64-w64-mingw32/x64 (64-bit)
> 
> locale:
> [1] LC_COLLATE=English_United Kingdom.1252
> [2] LC_CTYPE=English_United Kingdom.1252
> [3] LC_MONETARY=English_United Kingdom.1252
> [4] LC_NUMERIC=C
> [5] LC_TIME=English_United Kingdom.1252
> 
> attached base packages:
> [1] stats     graphics  grDevices utils     datasets  methods   base
> 
> 
> 
> 
> and then immediately after the above I thought to try a traceback() and got another different error again, which I don't even understand how is possible:
> 
>> colouredBiplot(random, yCol=rep("#FFFF00", 20))
> Error in x$x : $ operator is invalid for atomic vectors
>> traceback()
> 1: colouredBiplot(random, yCol = rep("#FFFF00", 20))
> 
> 
> 
> 
> The only things I have changed are to pass in a vector "yCol" and use it inside the else blocks of some conditionals testing 'if(yCol==NULL)':
> 
> colouredBiplot.internal <- function (x, y, var.axes = TRUE, col, cex = rep(par("cex"), 2),
>    xlabs = NULL, ylabs = NULL, expand = 1, xlim = NULL, ylim = NULL,
>    arrow.len = 0.1, main = NULL, sub = NULL, xlab = NULL, ylab = NULL, yCol=NULL,
>    ...)
> {
>    n <- nrow(x)
>    p <- nrow(y)
>    if (missing(xlabs)) {
>        xlabs <- dimnames(x)[[1L]]
>        if (is.null(xlabs))
>            xlabs <- 1L:n
>    }
>    xlabs <- as.character(xlabs)
>    dimnames(x) <- list(xlabs, dimnames(x)[[2L]])
>    if (missing(ylabs)) {
>        ylabs <- dimnames(y)[[1L]]
>        if (is.null(ylabs))
>            ylabs <- paste("Var", 1L:p)
>    }
>    ylabs <- as.character(ylabs)
>    dimnames(y) <- list(ylabs, dimnames(y)[[2L]])
>    if (length(cex) == 1L)
>        cex <- c(cex, cex)
>    if (missing(col)) {
>        col <- par("col")
>        if (!is.numeric(col))
>            col <- match(col, palette(), nomatch = 1L)
>        col <- c(col, col + 1L)
>    }
>    else if (length(col) == 1L)
>        col <- c(col, col)
>    unsigned.range <- function(x) c(-abs(min(x, na.rm = TRUE)),
>        abs(max(x, na.rm = TRUE)))
>    rangx1 <- unsigned.range(x[, 1L])
>    rangx2 <- unsigned.range(x[, 2L])
>    rangy1 <- unsigned.range(y[, 1L])
>    rangy2 <- unsigned.range(y[, 2L])
>    if (missing(xlim) && missing(ylim))
>        xlim <- ylim <- rangx1 <- rangx2 <- range(rangx1, rangx2)
>    else if (missing(xlim))
>        xlim <- rangx1
>    else if (missing(ylim))
>        ylim <- rangx2
>    ratio <- max(rangy1/rangx1, rangy2/rangx2)/expand
>    on.exit(par(op))
>    op <- par(pty = "s")
>    if (!is.null(main))
>        op <- c(op, par(mar = par("mar") + c(0, 0, 1, 0)))
>    plot(x, type = "n", xlim = xlim, ylim = ylim, col = col[1L],
>        xlab = xlab, ylab = ylab, sub = sub, main = main, ...)
>    text(x, xlabs, cex = cex[1L], col = col[1L], ...)
>    par(new = TRUE)
>    dev.hold()
>    on.exit(dev.flush(), add = TRUE)
>    if(yCol==NULL){
>    plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
>        ratio, xlab = "", ylab = "", col = col[1L], ...)
>        }
>        else{
>        plot(y, axes = FALSE, type = "n", xlim = xlim * ratio, ylim = ylim *
>        ratio, xlab = "", ylab = "", col = yCol, ...)
>        }
>    axis(3, col = col[2L], ...)
>    axis(4, col = col[2L], ...)
>    box(col = col[1L])
>    if(yCol==NULL){
>    text(y, labels = ylabs, cex = cex[2L], col = col[2L], ...)
>    } else{
>    text(y, labels = ylabs, cex = cex[2L], col = yCol, ...)
>    }
>    if (var.axes){
>    if(yCol==NULL){
>        arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L],
>            length = arrow.len)
>            }else{
>        arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = yCol,
>            length = arrow.len)            }
>            }
>    invisible()
> }
> 
> 
> colouredBiplot <- function (x, choices = 1L:2L, scale = 1, pc.biplot = FALSE, yCol=NULL, ...)
> {
>    if (length(choices) != 2L)
>        stop("length of choices must be 2")
>    if (!length(scores <- x$x))
>        stop(gettextf("object '%s' has no scores", deparse(substitute(x))),
>            domain = NA)
>    if (is.complex(scores))
>        stop("biplots are not defined for complex PCA")
>    lam <- x$sdev[choices]
>    n <- NROW(scores)
>    lam <- lam * sqrt(n)
>    if (scale < 0 || scale > 1)
>        warning("'scale' is outside [0, 1]")
>    if (scale != 0)
>        lam <- lam^scale
>    else lam <- 1
>    if (pc.biplot)
>        lam <- lam/sqrt(n)
>    colouredBiplot.internal(t(t(scores[, choices])/lam), t(t(x$rotation[,
>        choices]) * lam), yCol, ...)
>    invisible()
> }
> 
> 
> I have looked into a few alternatives but most either don't allow that type of different colouring, or else aren't compatable with my various versions of R.
> 
> Help me R-Help, you're my only hope,
> 
> Scott
> 
> 
> 
> 
> 	[[alternative HTML version deleted]]
> 
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.



More information about the R-help mailing list