[R] bwplot change whiskers position to percentile 5 and P95

Christophe Bouffioux christophe.b00 at gmail.com
Fri Oct 15 10:54:28 CEST 2010


Hi David,
More info

Thanks a lot
Christophe

##################
library(Hmisc)
library(lattice)
library(fields)
library(gregmisc)
library(quantreg)

> str(sasdata03_a)
'data.frame':   109971 obs. of  6 variables:
 $ jaar      : Factor w/ 3 levels "2006","2007",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ Cat_F     : Factor w/ 6 levels "a","d","g","h",..: 1 1 1 1 1 1 1 1 1 1
...
 $ montant_oa: num  8087 1960 1573 11502 3862 ...
 $ nombre_cas: int  519 140 111 780 262 125 79 334 97 503 ...
 $ montant_i : num  221 221 221 221 221 ...
 $ nombre_i  : int  12 12 12 12 12 12 12 12 12 12 ...
> aggregate.table(sasdata03_a$nombre_cas, sasdata03_a$jaar,
sasdata03_a$Cat_F, nobs)
        a       d      g      h      i       k
2006 7568 1911 5351 7430 7377 7217
2007 7499 1914 5378 7334 7275 7138
2008 7524 1953 5488 7292 7192 7130
> d2008 <- sasdata03_a[sasdata03_a$Cat_F=="d" & sasdata03_a$jaar==2008, ]
> quantile(d2008$nombre_cas, probs = c(0.95))
   95%
3630.2

#bbbbbbbbbbbbbbbbbbbbbb#
#                                       #
#  graph on real data           #
#                                       #
#bbbbbbbbbbbbbbbbbbbbbb#

bwplot(jaar ~ nombre_cas | Cat_F, xlab="", ylab="",
   data=sasdata03_a  , xlim=c(-100,3800), layout=c(2,3), X =
sasdata03_a$nombre_i,
   pch = "|",
   stats=boxplot2.stats,
   main="Fig. 1: P95 d 2008=3630",

   par.settings = list(
   plot.symbol = list(alpha = 1, col = "transparent",cex = 1,pch = 20)),
   panel = function(x, y, ..., X, subscripts){
          panel.grid(v = -1, h = 0)
          panel.bwplot(x, y, ..., subscripts = subscripts)
          X <- X[subscripts]
          X <- tapply(X, y, unique)
          Y <- tapply(y, y, unique)
          tg <- table(y)
          panel.points(X, Y, cex=3, pch ="|" , col = "red")
          panel.text((3700*0.8), (Y-0.15), labels = paste("N=", tg))

  })

########################
#    Simulated data 1          #
########################

ex <- data.frame(v1 = log(abs(rt(180, 3)) + 1),
                 v2 = rep(c("2007", "2006", "2005"), 60),
                 z  = rep(c("a", "b", "c", "d", "e", "f"), e = 30))

ex2 <- data.frame(v1b = log(abs(rt(18, 3)) + 1),
                 v2 = rep(c("2007", "2006", "2005"), 6),
                 z  = rep(c("a", "b", "c", "d", "e", "f"), e = 3))
ex3 <- merge(ex, ex2, by=c("v2","z"))

#################################
#                                                      #
#    graph on simulated data               #
#                                                      #
#################################

bwplot(as.factor(v2) ~ v1 | as.factor(z), data = ex3, layout=c(3,2), X =
ex3$v1b,
  pch = "|", main="Boxplot.stats",
  stats=boxplot2.stats,
  par.settings = list(
  plot.symbol = list(alpha = 1, col = "transparent",cex = 1,pch = 20)),
 panel = function(x, y, ..., X, subscripts){
   panel.grid(v = -1, h = 0)
   panel.bwplot(x, y, ..., subscripts = subscripts)
    X <- X[subscripts]
    xmax =max(x)
    X <- tapply(X, y, unique)
    Y <- tapply(y, y, unique)
    tg <- table(y)
    panel.points(X, Y, cex=3, pch ="|" , col = "red")
    panel.text((xmax-xmax*0.1), (Y-0.15), labels = paste("N=", tg))
 })
##########################



On Thu, Oct 14, 2010 at 5:22 PM, David Winsemius <dwinsemius at comcast.net>wrote:

>
> On Oct 14, 2010, at 11:07 AM, Christophe Bouffioux wrote:
>
> Hi,
>>
>> I have tried your proposition, and it works properly on the simulated
>> data, but not on my real data, and I do not see any explanations, this is
>> weird, i have no more ideas to explore the problem
>>
>
> You should:
> a) provide the code that you used to call boxplot
> b) offer str(sasdata03_a) , .... since summary will often obscure
> class-related problems
> c) consider running:
> sasdata03_a$Cat_F <- factor(sasdata03_a$Cat_F) # to get rid of the empty
> factor level
>
>
>
>>
>> so here i give some information on my data, nothing special actually,
>>
>> Christophe
>>
>> > summary(sasdata03_a)
>>   jaar           Cat_F              montant_oa            nombre_cas
>>  montant_i
>>  2006:36854   a      :22591    Min.   :  -112.8        Min.   :   -5.0
>>   Min.   :   33.22
>>  2007:36538   h      :22056    1st Qu.:  1465.5     1st Qu.:  104.0    1st
>> Qu.:   37.80
>>  2008:36579   i      :21844     Median :  4251.5     Median :  307.0
>> Median :   50.00
>>                    k      :21485     Mean   : 13400.0    Mean   :  557.5
>>  Mean   : 1172.17
>>                    g      :16217     3rd Qu.: 13648.5    3rd Qu.:  748.0
>>  3rd Qu.:  458.67
>>                    d      : 5778      Max.   : 534655.6    Max.   :13492.0
>>   Max.   :17306.73
>>                    (Other):    0
>>    nombre_i
>>  Min.   :  1.00
>>  1st Qu.:  1.00
>>  Median :  5.00
>>  Mean   : 44.87
>>  3rd Qu.: 29.00
>>  Max.   :689.00
>>
>> > is.data.frame(sasdata03_a)
>> [1] TRUE
>>
>> The code of the function:
>>
>> boxplot2.stats <-  function (x, coef = 1.5, do.conf = TRUE, do.out = TRUE)
>> {
>>    if (coef < 0)
>>        stop("'coef' must not be negative")
>>    nna <- !is.na(x)
>>    n <- sum(nna)
>>    stats <- stats::fivenum(x, na.rm = TRUE)
>>    stats[c(1,5)]<- quantile(x, probs=c(0.05, 0.95))
>>    iqr <- diff(stats[c(2, 4)])
>>    if (coef == 0)
>>        do.out <- FALSE
>>    else {
>>        out <- if (!is.na(iqr)) {
>>            x < (stats[2L] - coef * iqr) | x > (stats[4L] + coef *
>>                iqr)
>>        }
>>        else !is.finite(x)
>>        if (any(out[nna], na.rm = TRUE))
>>            stats[c(1, 5)] <- range(x[!out], na.rm = TRUE)
>>    }
>>    conf <- if (do.conf)
>>        stats[3L] + c(-1.58, 1.58) * iqr/sqrt(n)
>>    list(stats = stats, n = n, conf = conf, out = if (do.out) x[out &
>>        nna] else numeric(0L))
>> }
>>
>>
>> On Wed, Oct 13, 2010 at 5:13 PM, David Winsemius <dwinsemius at comcast.net>
>> wrote:
>>
>> On Oct 13, 2010, at 10:05 AM, Christophe Bouffioux wrote:
>>
>> Dear R-community,
>>
>> Using bwplot, how can I put the whiskers at percentile 5 and percentile
>> 95,
>> in place of the default position coef=1.5??
>>
>> Using panel=panel.bwstrip, whiskerpos=0.05, from the package agsemisc
>> gives
>> satisfaction, but changes the appearance of my boxplot and works with an
>> old
>> version of R, what I don’t want, and I didn’t find the option in
>> box.umbrella parameters
>>
>> Nope, you won't find it even if you search harder, but you do have a
>> lattice path forward. Just as base function boxplot() does the calculations
>> and then plots with bxp(), by default panel.bwplot sends the data to
>> boxplot.stats, but panel.bwplot also allows you to specify an alternate
>> function that returns plotting parameters differently as long as those
>> conforms to the requirements for structure. You can look at boxplot.stats
>> (it's not that big)  and then construct an alternative. The line you would
>> need to alter would be the one starting with:  stats<-stats::fivenum(...),
>> since you are changing the values returned by fivenum(). You might get away
>> with just changing stats[1] and stats[5] to your revised specifications,
>> although it has occurred to me that you might get some of those "out" dots
>> inside your whiskers. (Fixing that would not be too hard once you are inside
>> boxplot.stats().
>>
>> Seemed to work for me with your data (at least the extent of plotting a
>> nice 3 x 2 panel display. All I did was redefine an nboxplot.stats by
>> inserting this line after the line cited above:
>>
>> stats[c(1,5)]<- quantile(x, probs=c(0.05, 0.95))
>>
>> and then added an argument  ..., stats=nboxplot.stats)  inside your
>> panel.bwplot.
>>
>> --
>> David.
>>
>>
>> Many thanks
>> Christophe
>>
>> Here is the code:
>>
>> library(lattice)
>> ex <- data.frame(v1 = log(abs(rt(180, 3)) + 1),
>>               v2 = rep(c("2007", "2006", "2005"), 60),
>>               z  = rep(c("a", "b", "c", "d", "e", "f"), e = 30))
>>
>> ex2 <- data.frame(v1b = log(abs(rt(18, 3)) + 1),
>>               v2 = rep(c("2007", "2006", "2005"), 6),
>>               z  = rep(c("a", "b", "c", "d", "e", "f"), e = 3))
>> ex3 <- merge(ex, ex2, by=c("v2","z"))
>> D2007 <- ex3[ex3$z=="d" & ex3$v2==2007, ]
>> D2006 <- ex3[ex3$z=="d" & ex3$v2==2006, ]
>> C2007 <- ex3[ex3$z=="c" & ex3$v2==2007, ]
>>
>>
>> quantile(D2007$v1, probs = c(0.05, 0.95))
>> quantile(D2006$v1, probs = c(0.05, 0.95))
>> quantile(C2007$v1, probs = c(0.05, 0.95))
>>
>> bwplot(v2 ~ v1 | z, data = ex3, layout=c(3,2), X = ex3$v1b,
>> pch = "|",
>> par.settings = list(
>> plot.symbol = list(alpha = 1, col = "transparent",cex = 1,pch = 20)),
>> panel = function(x, y, ..., X, subscripts){
>> panel.grid(v = -1, h = 0)
>> panel.bwplot(x, y, ..., subscripts = subscripts)
>> X <- X[subscripts]
>> xmax =max(x)
>> X <- tapply(X, y, unique)
>> Y <- tapply(y, y, unique)
>> tg <- table(y)
>> panel.points(X, Y, cex=3, pch ="|" , col = "red")
>> #vcount <- tapply(v1, v2, length)
>> panel.text((xmax-0.2), (Y-0.15), labels = paste("N=", tg))
>> })
>>
>>       [[alternative HTML version deleted]]
>>
>> ______________________________________________
>> R-help at r-project.org mailing list
>> https://stat.ethz.ch/mailman/listinfo/r-help
>> PLEASE do read the posting guide
>> http://www.R-project.org/posting-guide.html<http://www.r-project.org/posting-guide.html>
>> and provide commented, minimal, self-contained, reproducible code.
>>
>> David Winsemius, MD
>> West Hartford, CT
>>
>>
>>
> David Winsemius, MD
> West Hartford, CT
>
>


More information about the R-help mailing list