[R] ggplot / reshape: basic usage

Tue Feb 5 10:55:41 CET 2008

Sorry i realize my example was silly. I've played a bit more and i  
now have a working example using base graphics and the plotCI  
function from the plotrix package (reproduced for self-consistency).

I start with some scatterplot, and I want to group the data in say 4  
arbitrary intervals along x. For each of these subgroups I now  
compute the mean x and y values, and the associated deviations.  
Finally, I plot these 4 points with errorbars and shaded rectangles  
displaying where the cuts occurred.

It looked to me this whole process would be straight forward with  
reshape and ggplot2, but my previous attempts at understanding "cast"  
failed.

Thanks for any tips,

baptiste

Here is the code,

>
>
> #require(plotrix)
> plotCI<-function (x, y = NULL, uiw, liw = uiw, ui = NULL, li = NULL,
>     err = "y", sfrac = 0.01, gap = 0, slty = par("lty"), add = FALSE,
>     scol = NULL, pt.bg = par("bg"), ...)
> {
>     arglist <- list(...)
>     if (is.list(x)) {
>         y <- x$y
>         x <- x$x
>     }
>     if (is.null(y)) {
>         if (is.null(x))
>             stop("both x and y NULL")
>         y <- as.numeric(x)
>         x <- seq(along = x)
>     }
>     if (missing(uiw) && (is.null(ui) || is.null(li)))
>         stop("must specify either relative limits or both lower and  
> upper limits")
>     if (!missing(uiw)) {
>         if (err == "y")
>             z <- y
>         else z <- x
>         ui <- z + uiw
>         li <- z - liw
>     }
>     if (is.null(arglist$xlab))
>         arglist$xlab <- deparse(substitute(x))
>     if (is.null(arglist$ylab))
>         arglist$ylab <- deparse(substitute(y))
>     if (err == "y" && is.null(arglist$ylim))
>         arglist$ylim <- range(c(y, ui, li), na.rm = TRUE)
>     if (err == "x" && is.null(arglist$xlim))
>         arglist$xlim <- range(c(x, ui, li), na.rm = TRUE)
>     if (missing(scol)) {
>         if (!is.null(arglist$col))
>             scol <- arglist$col
>         else scol <- par("col")
>     }
>     plotpoints <- TRUE
>     if (!is.null(arglist$pch) && is.na(arglist$pch)) {
>         arglist$pch <- 1
>         plotpoints <- FALSE
>     }
>     if (!add)
>         do.call("plot", c(list(x, y, type = "n"), clean.args(arglist,
>             plot)))
>     if (gap == TRUE)
>         gap <- 0.01
>     ul <- c(li, ui)
>     if (err == "y") {
>         gap <- rep(gap, length(x)) * diff(par("usr")[3:4])
>         smidge <- par("fin")[1] * sfrac
>         arrow.args <- c(list(lty = slty, angle = 90, length = smidge,
>             code = 1, col = scol), clean.args(arglist, arrows,
>             exclude.other = c("col", "lty")))
>         do.call("arrows", c(list(x, li, x, pmax(y - gap, li)),
>             arrow.args))
>         do.call("arrows", c(list(x, ui, x, pmin(y + gap, ui)),
>             arrow.args))
>     }
>     else if (err == "x") {
>         gap <- rep(gap, length(x)) * diff(par("usr")[1:2])
>         smidge <- par("fin")[2] * sfrac
>         arrow.args <- c(list(lty = slty, angle = 90, length = smidge,
>             code = 1), clean.args(arglist, arrows, exclude.other = c 
> ("col",
>             "lty")))
>         do.call("arrows", c(list(li, y, pmax(x - gap, li), y),
>             arrow.args))
>         do.call("arrows", c(list(ui, y, pmin(x + gap, ui), y),
>             arrow.args))
>     }
>     if (plotpoints)
>         do.call("points", c(list(x, y, bg = pt.bg), clean.args 
> (arglist,
>             points, exclude.other = c("xlab", "ylab", "xlim",
>                 "ylim", "axes"))))
>     invisible(list(x = x, y = y))
> }
> xx<-seq(0,10,length=500)
> yy<-jitter(cos(10*xx)+cos(10*(xx*1.1)),amount=0.5) # some beating +  
> random noise
>
> data<-as.data.frame(list(xx=xx,yy=yy))
>
> lev<-seq(range(data$xx)[1],range(data$xx)[2],length=5) # levels for  
> binning
> g <- cut(data$xx,lev ) # factors for binning
>
> se <- function(x) sd(x)/sqrt(length(x)) # standard error
> test<-sapply(split(data,g),function(d) list(meanx=mean(d 
> $xx),meany=mean(d$yy),stdx=se(d$xx),stdy=se(d$yy)))
> as.data.frame(t(test))->test
>
> plot(xx,yy)
> plotCI(as.numeric(test$meanx),as.numeric(test$meany),2*as.numeric 
> (test$stdy),lwd=2,col=1,scol=2,add=TRUE) # y errorbars
> plotCI(as.numeric(test$meanx),as.numeric(test$meany),2*as.numeric 
> (test$stdx),lwd=2,col=3,err="x",scol=5,add=TRUE) # x errorbars
>
> rect(lev[seq(1,length(lev)-1,by=1)], min(yy),lev[seq(2,length 
> (lev),by=1)], max(yy),col =c(rgb(.8,.8,.8,.1),rgb(.6,.6,.6,.1)),  
> border = NA) # show bins

On 24 Jan 2008, at 01:34, hadley wickham wrote:

> On Jan 23, 2008 10:44 AM, baptiste Auguié <ba208 at exeter.ac.uk> wrote:
>> Hi,
>>
>> I've been trying to do the following simple thing: given a  
>> data.frame,
>>
>>> library(reshape)
>>> library(ggplot2)
>>>
>>> df <- data.frame(x=c(1:10),y=sin(1:10),z=cos(1:10))
>>> dfm<-melt(df, id=c("x"), measured=c("y","z"))
>>
>>
>> i want to plot  y and z against x, and add vertical errorbars to the
>> points corresponding to the standard deviation of y and z  
>> respectively.
>>
>> I tried the following, inspired by some previous post in the list,
>>
>>> se <- function(x) sd(x)/sqrt(length(x))
>>> means <- cast(dfm, variable~., function(x) c(se = se(x)))
>>>
>>> qplot(value,x, data=means, colour=variable, min = value - se, max =
>>>  value + se, geom=c("point","errorbar"))
>>
>>
>> but this fails, as I obviously don't get the philosophy behind the
>> "cast" function.
>
> Could you explain a little more what you are trying to do?  There is
> one s.e. for y and one z, but what are you trying to plot them
> against?
>
> Hadley
>
>
> -- 
> http://had.co.nz/

_____________________________

Baptiste Auguié

Physics Department
University of Exeter
Stocker Road,
Exeter, Devon,
EX4 4QL, UK

Phone: +44 1392 264187

http://newton.ex.ac.uk/research/emag
http://projects.ex.ac.uk/atto