[R] Grouped boxplots using ggplot() from ggplot2.

Richard M. Heiberger rmh @end|ng |rom temp|e@edu
Mon Jul 30 05:32:36 CEST 2018


## I recommend using lattice for this task.
## First I show the example from my book and package (HH).
## Then I use this on your example.

library(HH)       ## Package supporting Heiberger and Holland,
                  ## Statistical Analysis and Data Display (Second
edition, 2015)
HHscriptnames(4)  ## Filename on your computer for script for all
Chapter 4 examples

## this is Chunk 23

###################################################
### code chunk number 23: grap.tex:1953-1981
###################################################
bwdata <- data.frame(Y=(rt(80, df=5)*5 + rep(c(20,25,15,22, 22,28,16,14), 10)),
                     week=ordered(rep(c(1:4, 1:4), each=10)),
                     treatment= rep(c("A", "B"), each=40))

position(bwdata$week) <- c(1, 2, 4, 8)
levels(bwdata$week) <- c(1, 2, 4, 8)

bwdata$week.treatment <- with(bwdata, interaction(treatment, week))
position(bwdata$week.treatment) <-
   as.vector(t(outer(c(1, 2, 4, 8), c(-.18,.18), "+")))

BR <- likertColor(2, colorFunctionOption="default")[2:1]

## uses panel.bwplot.intermediate.hh to control position and colors
## hhpdf("bwplotposition.pdf", width=7, height=5)
bwplot(Y ~ week.treatment, data=bwdata,
       panel=panel.bwplot.intermediate.hh, xlim=c(0, 9),
       box.width=.25,
       pch=c(17, 16), col=BR,
       xlab="Week", ylab=list(rot=0),
       scales=list(x=list(at=position(bwdata$week), tck=1)),
       key=list(
          text=list(c("A","B"), col=BR),
          points=list(pch=c(17, 16), col=BR),
          space="top", columns=2, between=1, border=TRUE,
          title="Treatment", cex.title=.9)) +
   layer(panel.abline(h=37, col="gray60", lty=3, lwd=2))
## hhdev.off()


## The placement features provided by panel.bwplot.intermediate.hh are
## 1. The two boxes at each time position are clearly distinguished
##    from boxes at other time positions.
##
## 2. Times do not need to be evenly spaced.


## Now your sample data and lattice code for your desired graph


## Script to demonstrate what I am trying to do.
##

## Simulate some data:
Year <- factor(rep(4:8,each=50,times=2))
Type <- rep(c("National","Local"),each=250)
M0   <- 1300+50*(0:4)
set.seed(42)
M1   <- M0 + runif(5,-100,-50)
X0   <- rnorm(250,rep(M0,each=50),150)
X1   <- rnorm(250,rep(M1,each=50),100)
DemoDat <- data.frame(Year=Year,Score=c(X0,X1),Type=Type)

if (FALSE) { ## Rolf Turners original code
## Grouped boxplots:
library(ggplot2)
print(ggplot(data=DemoDat) +
    geom_boxplot(aes(x=Year, y=Score, color=Type,fill=Type),
                 position=position_dodge(1),alpha=0) +
    theme_minimal() +
    scale_fill_discrete(name="National v. Local") +
    ylim(700,2100))
}


DemoDat$Year.Type <- with(DemoDat, interaction(Year, Type))
position(DemoDat$Year.Type) <-
   as.vector(t(outer(c(4, 5, 6, 7, 8), c(-.18, .18), "+")))
RB <- c("red", "black")

SYT <-
bwplot(Score ~ Year.Type, data=DemoDat,
       panel=panel.bwplot.intermediate.hh,
       xlim=c(-.1, 9.1),
       ylim=c(690, 2110),
       box.width=.22,
       col=RB,
       xlab="Year", ylab=list(rot=0),
       scales=list(x=list(at=0:9, tck=1),
                   y=list(at=seq(700, 2100, 100), tick=1)),
       par.settings=list(box.dot=list(pch="|"),
                         plot.symbol=list(pch="-", col=RB, cex=1.5)),
       key=list(
         text=list(levels(DemoDat$Type), col=RB, cex=.8),
         lines=list(col=RB), size=1.5,
         columns=2, between=.5, between.columns=.6,
         space="right", border=FALSE,
         title="\nNational v. Local", cex.title=.9),
       main="Matches specifications"
       )
SYT

update(SYT, main="Outliers made invisible, not recommended",
       par.settings=list(plot.symbol=list(cex=0)))

## Rich


On Sat, Jul 28, 2018 at 7:04 PM, Rolf Turner <r.turner using auckland.ac.nz> wrote:
>
> On 29/07/18 02:54, Jeff Newmiller wrote:
>
>> 1) I don't know... it looks to me like you did not run my code.
>
>
> Aaaarrrgghhh.  I *thought* I had, but instead left "fill=Type" inside the
> aes() call and neglected to add fill=NULL outside this call. Duhhhh!!! It's
> tough being mentally challenged, let me assure you.
>
>> I have included a complete reprex below... try it out in a fresh session.
>> If you still get the problem, check your sessionInfo package versions
>> against mine.
>
>
> Yep.  Works like a charm.
>
>> 2) This still smells like your fill parameter is inside the aes function
>> with Type as value. This causes a legend to be created, and since that
>> legend has a different name ("Type") than the colour scale, they are
>> separated. Confirm that you are using fill outside the aes function (because
>> you don't want fill to depend on the data) and have the constant NULL as
>> value (so it won't generate any fill graphical representation).
>
>
> Yeah.  Well.  Duhhh.  I'm a retread.
>>
>>
>> 3) I missed that... the ylim()/scales_y_continuous(breaks=) limits
>> constrain which data are included as input into the graph. The
>> coord_cartesian function forces the limits as desired.
>
>
> Bewdy, ta.
>
>>
>> 4) While showing outliers is a standard semantic feature of boxplots
>> whether produced by ggplot or lattice or base or non-R solution,
>
>
> Indeed.  But the client is always right! :-)
>
>> you can please the client by making the outliers transparent.
>
>
> And your code shows me how!  Which I need.  Bewdy, ta.
>
>
>> There is a link to the generated image below.
>>
>> ################
>> # Simulate some data:
>> Type <- rep( c( "National", "Local" ), each = 250 )
>> M0   <- 1300+50*(0:4)
>> set.seed( 42 )
>> M1   <- M0 + runif( 5, -100, -50 )
>> X0   <- rnorm( 250, rep( M0, each = 50 ), 150 )
>> X1   <- rnorm( 250, rep( M1, each = 50 ), 100 )
>>
>> library(ggplot2)
>> Year <- factor( rep( 4:8, each = 50, times = 2)
>>                , levels = 0:8 )
>> DemoDat <- data.frame( Year = Year
>>                       , Score = c( X0, X1 )
>>                       , Type = Type
>>                       )
>>
>> ggplot( data = DemoDat
>>        , aes( x = Year
>>             , y = Score
>>             , color = Type
>>             )
>>        , fill = NULL
>>        ) +
>>      geom_boxplot( position = position_dodge( 1 )
>>                  , outlier.alpha = 0
>>                  ) +
>>      theme_minimal() +
>>      scale_colour_manual( name = "National v. Local"
>>                         , values = c( "red", "black" ) ) +
>>      scale_x_discrete( drop = FALSE ) +
>>      scale_y_continuous( breaks=seq( 700, 2100, 100 ) ) +
>>      coord_cartesian( ylim = c( 700, 2100 ) )
>>
>> # ![](https://i.imgur.com/wUVYU5H.png)
>
>
> Looks perfect.  Thanks *HUGELY* for your patience with my stupidity.
>
> <SNIP>
>
> cheers,
>
> Rolf
>
> --
> Technical Editor ANZJS
> Department of Statistics
> University of Auckland
> Phone: +64-9-373-7599 ext. 88276
>
> ______________________________________________
> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.




More information about the R-help mailing list