[R] factor interaction boxplot ordering by median

Sergio Fonda sergio.fonda99 at gmail.com
Sun Sep 6 17:08:29 CEST 2015


Thanks to Marc Schwartz I was able to order a "two factors"
interaction boxplot with median associated to one factor alone.
I tried further to generate facets plot (3x2 boxplots in ggplot2) for the
dataframe reported at bottom and I'm not able to reach a correct plot.
The dataframe is a simulation of genes expressions in five conditions of
patiens.
I would like that each of the 5 boxplots were ordered with median of values
associated to the combination "gene*pat.cond".
Any help is much appreciated,
Sergio
 ____________________________
Example Dataframe:
    pat.cond     gene        value
1          N  ADAMTS1 -5.557194292
2          N  ADAMTS1 -2.576700157
3          E  ADAMTS1 -6.629356620
4          E  ADAMTS1 -6.629356620
5          E  ADAMTS1 -6.629356620
6          E  ADAMTS1 -6.629356620
7          M  ADAMTS1 -6.629356620
8          M  ADAMTS1 -6.629356620
9          M  ADAMTS1 -6.629356620
10         M  ADAMTS1 -2.742163709
11         N ADAMTS12 -1.084104611
12         E ADAMTS12  1.710871953
13         E ADAMTS12  6.629356620
14         M ADAMTS12  6.629356620
15         M ADAMTS12  1.711422682
16         E      APC -3.665169008
17         E      APC -1.192183578
18         M      APC -6.629356620
19         M      APC  3.696542194
20         M      APC -3.533991641
21         R      APC -4.267887134
22         N    BORIS  6.629356620
23         E    BORIS  6.629356620
24         M    BORIS  6.629356620
25         M    BORIS  6.629356620
26         M    BORIS  6.629356620
27         S    BORIS  6.629356620
28         E    BRCA1 -4.260347862
29         E    BRCA1 -6.629356620
30         E    BRCA1 -6.629356620
31         M    BRCA1 -6.443174967
32         M    BRCA1 -6.629356620
33         M    BRCA1 -6.629356620
34         M    BRCA1 -2.525972281
35         N      P16 -1.698706675
36         M      P16 -6.629356620
37         M      P16 -6.629356620
38         M      P16 -6.629356620
39         S      P16 -6.629356620
40         S      P16 -6.629356620
41         S      P16  0.518269571
42         N     DAX1  0.708010228
43         E     DAX1 -0.337455350
44         E     DAX1  2.803374605
45         E     DAX1 -1.407226228
46         M     DAX1  6.629356620
47         S     DAX1  2.022408499
48         S     DAX1  6.629356620
49         S     DAX1  6.629356620
50         S     DAX1  6.629356620
51         N     DKK1 -0.876905559
52         N     DKK1  0.008176565
53         E     DKK1 -3.057081024
54         M     DKK1  0.804046915
55         N     DKK2 -4.677880676
56         N     DKK2 -3.264182143
57         M     DKK2  0.219772061
58         R     DKK2 -3.205760419
59         R     DKK2 -3.567799537
60         R     DKK2 -1.687113091
61         S     DKK2 -6.629356620
62         S     DKK2 -6.629356620
63         N     ESR1  1.479810020
64         M     ESR1  1.861011014
65         N     FBP1 -0.110196473
66         N     FBP1  0.721286184
67         E     FBP1 -5.107943868
68         E     FBP1 -4.593812366
69         M     FBP1 -1.622176688
70         S     FBP1 -3.097791525
71         E    FOXL2 -4.564123239
72         E    FOXL2  3.111180437
73         M    FOXL2  0.323703764
74         M    FOXL2 -3.851350485
75         R    FOXL2 -3.324087523
76         S    FOXL2  0.739408989
77         M    GATA5 -1.171932246
78         M    GATA5 -3.466054731
79         M    GATA5 -1.283038699
80         S    GATA5 -2.778390690
81         S    GATA5 -2.014479273
82         S    GATA5 -3.015234172
83         E     GPX3 -6.629356620
84         E     GPX3 -1.942190735
85         M     GPX3  6.629356620
86         M     GPX3 -6.629356620
87         M     GPX3 -2.615982450
88         M     GPX3 -6.629356620
89         M     GPX3  0.128743354
90         S     GPX3 -6.629356620
91         E      MAL -6.629356620
92         E      MAL -4.825545452
93         E      MAL -6.629356620
94         E      MAL -6.629356620
95         E      MAL -6.629356620
96         M      MAL -0.419005364
97         M      MAL -0.923667455
98         M      MAL  6.629356620
99         M      MAL  3.371740196
100        S      MAL -6.629356620
101        N     MGMT -6.629356620
102        E     MGMT  1.115112556
103        M     MGMT  4.030893797
104        S     MGMT -6.629356620
105        N     MLH1 -0.519875304
106        N     MLH1 -1.352872084
107        E     MLH1 -0.777864442
108        E     MLH1  1.105073029
109        E     MLH1  5.758699199
110        E     MLH1 -1.498072236
111        S     MLH1 -1.630362301
112        M    MYOD1 -6.629356620
113        M    MYOD1  6.629356620
114        M    MYOD1 -6.629356620
115        M    MYOD1 -6.629356620
116        R    MYOD1 -6.629356620
117        R    MYOD1 -6.629356620
118        S    MYOD1 -6.629356620
119        S    MYOD1 -6.629356620
120        S    MYOD1 -4.645053781
121        N    NELL1 -6.629356620
122        N    NELL1 -5.536591557
123        N    NELL1 -5.903856552
124        E    NELL1 -6.629356620
125        E    NELL1 -6.629356620
126        M    NELL1 -6.629356620
127        S    NELL1 -6.629356620
128        N     OSMR -6.629356620
129        M     OSMR  4.407821839
130        M     OSMR  0.364604851
131        M     OSMR -6.629356620
132        S     OSMR -6.629356620
133        N     PAX6  6.629356620
134        E     PAX6 -3.401030959
135        M     PAX6 -0.946878855
136        S     PAX6  0.964721065
137        E    PTGS2 -3.749795240
138        M    PTGS2 -6.629356620
139        S    PTGS2 -2.076356656
140        S    PTGS2 -3.961164916
141        N     RARB -4.611429173
142        E     RARB -1.706166432
143        S     RARB -3.697493231
144        N  RASSF1A -1.505128642
145        E  RASSF1A  0.808179628
146        E  RASSF1A -6.629356620
147        E  RASSF1A -0.694997778
148        M  RASSF1A -0.332307577
149        S  RASSF1A -6.629356620
150        S  RASSF1A  6.629356620
151        N     RPRM -4.023523466
152        E     RPRM  0.443243995
153        S     RPRM -6.629356620
154        S     RPRM -4.019270038
155        E    RSPOI -1.735307607
156        E    RSPOI -1.296083205
157        M    RSPOI -3.003752756
158        S    RSPOI -3.156564936
159        S    RSPOI -2.928871731
160        N  SEPTIN9 -6.629356620
161        E  SEPTIN9 -2.585469731
162        E  SEPTIN9 -2.525798264
163        M  SEPTIN9  6.629356620
164        R  SEPTIN9 -3.071222253
165        S  SEPTIN9 -6.629356620
166        M    SFRP1 -1.426465815
167        R    SFRP1 -6.629356620
168        S    SFRP1 -2.348648751
169        S    SFRP1 -6.629356620
170        S    SFRP1 -2.304295273
171        N    SFRP4 -2.315044901
172        E    SFRP4 -2.940158139
173        M    SFRP4  6.629356620
174        S    SFRP4 -6.629356620
175        S    SFRP4  6.629356620
176        N    SFRP5 -6.629356620
177        E    SFRP5 -5.833523393
178        E    SFRP5  2.765666156
179        E    SFRP5  2.707734922
180        E    SFRP5  6.629356620
181        M    SMAD4 -6.629356620
182        M    SMAD4 -6.629356620
183        M    SMAD4 -6.629356620
184        R    SMAD4 -6.629356620
185        S    SMAD4 -6.629356620
186        S    SMAD4 -6.629356620
187        S    SMAD4 -6.629356620
188        N    SOCS3 -0.604614698
189        N    SOCS3  0.097268646
190        E    SOCS3 -1.505450218
191        E    SOCS3  6.629356620
192        N    SPARC -3.865639988
193        N    SPARC -1.868782928
194        E    SPARC  2.821046068
195        E    SPARC  6.629356620
196        S    SPARC -6.629356620
197        S    SPARC  0.202017887
198        S    SPARC  6.629356620
199        S    SPARC  6.629356620
200        N     TAC1 -1.528221578
201        M     TAC1  6.629356620
202        M     TAC1  6.629356620
203        R     TAC1 -2.294267250
204        S     TAC1  2.624594634
205        N     TERT -2.985641232
206        E     TERT  2.809747729
207        E     TERT  6.629356620
208        E     TERT -3.164404633
209        M     TERT  6.629356620
210        M     TERT -2.766881108
211        S     TERT  0.875337372
212        S     TERT  3.175095461
213        N    TIMP3 -2.046729298
214        E    TIMP3 -3.461408230
215        E    TIMP3 -2.996720557
216        M    TIMP3 -6.629356620
217        M    TIMP3 -1.527333149
218        R    TIMP3 -3.933657283
219        S    TIMP3 -1.020144976
220        S    TIMP3 -2.357112874
221        S    TIMP3  0.806736362
222        M   TMEFF2 -1.160275850
223        M   TMEFF2 -2.712025806
224        R   TMEFF2 -3.961478237
225        R   TMEFF2 -6.510953714
226        S   TMEFF2 -0.619555286
227        S   TMEFF2  2.344341057
228        E     WIF1 -6.629356620
229        E     WIF1 -3.806114522
230        E     WIF1 -2.318313158
231        M     WIF1  1.102899897
232        M     WIF1  6.629356620
233        M     WIF1 -6.629356620
234        N     WNT4 -6.629356620
235        E     WNT4 -3.474027185
236        E     WNT4 -6.629356620
237        S     WNT4 -6.629356620
238        S     WNT4 -6.629356620
239        E      WRN -6.629356620
240        M      WRN -4.148942985
241        S      WRN -1.855994142
242        R      WT1 -0.824982393
243        S      WT1  1.236129501
244        S      WT1  1.088540877


2015-09-05 15:08 GMT+02:00 Marc Schwartz <marc_schwartz at me.com>:

>
> > On Sep 5, 2015, at 7:29 AM, Sergio Fonda <sergio.fonda99 at gmail.com>
> wrote:
> >
> > I would to visualize in boxplot a data frame with two factors ordering
> one
> > factor with the median.
> > As example,suppose to have the InsectSprays dataframe, where an
> "operator"
> > factor with two levels, op1 and op2, has been added as shown at bottom
> here.
> > How may be generated a boxplot showing boxes for the interaction
> spray*op,
> > ordered according to the operators' count median for every spray ?
> > Thanks in advance for any help!
> > Sergio
> > ________________________________
> > Modified InsectSprays dataframe:
>
> <snip>
>
> Hi,
>
> There is actually an example of reordering factor levels by a calculated
> numeric value using the InsectSprays data frame in ?boxplot using ?reorder.
> An interaction can be created by using ?interaction.
>
> Given your data above, in a data frame “DF”:
>
> DF <- structure(list(count = c(10L, 7L, 20L, 14L, 14L, 12L, 10L, 23L,
> 17L, 20L, 14L, 13L, 11L, 17L, 21L, 11L, 16L, 14L, 17L, 17L, 19L,
> 21L, 7L, 13L, 0L, 1L, 7L, 2L, 3L, 1L, 2L, 1L, 3L, 0L, 1L, 4L,
> 3L, 5L, 12L, 6L, 4L, 3L, 5L, 5L, 5L, 5L, 2L, 4L, 3L, 5L, 3L,
> 5L, 3L, 6L, 1L, 1L, 3L, 2L, 6L, 4L, 11L, 9L, 15L, 22L, 15L, 16L,
> 13L, 10L, 26L, 26L, 24L, 13L), spray = structure(c(1L, 1L, 1L,
> 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
> 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
> 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L,
> 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
> 6L, 6L, 6L, 6L, 6L), .Label = c("A", "B", "C", "D", "E", "F"), class =
> "factor"),
>     op = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
>     2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
>     1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L,
>     1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
>     2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
>     2L, 2L), .Label = c("op1", "op2"), class = "factor")), .Names =
> c("count",
> "spray", "op"), class = "data.frame", row.names = c("1", "2",
> "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14",
> "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
> "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36",
> "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47",
> "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58",
> "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69",
> "70", "71", "72"))
>
>
> # Modify the ?boxplot example
> bymedian <- with(DF, reorder(interaction(spray, op), count, median))
>
> > bymedian
>  [1] A.op1 A.op1 A.op1 A.op1 A.op1 A.op1 A.op2 A.op2 A.op2 A.op2 A.op2
> [12] A.op2 B.op1 B.op1 B.op1 B.op1 B.op1 B.op1 B.op2 B.op2 B.op2 B.op2
> [23] B.op2 B.op2 C.op1 C.op1 C.op1 C.op1 C.op1 C.op1 C.op2 C.op2 C.op2
> [34] C.op2 C.op2 C.op2 D.op1 D.op1 D.op1 D.op1 D.op1 D.op1 D.op2 D.op2
> [45] D.op2 D.op2 D.op2 D.op2 E.op1 E.op1 E.op1 E.op1 E.op1 E.op1 E.op2
> [56] E.op2 E.op2 E.op2 E.op2 E.op2 F.op1 F.op1 F.op1 F.op1 F.op1 F.op1
> [67] F.op2 F.op2 F.op2 F.op2 F.op2 F.op2
> attr(,"scores")
> A.op1 B.op1 C.op1 D.op1 E.op1 F.op1 A.op2 B.op2 C.op2 D.op2 E.op2 F.op2
>  13.0  15.0   1.5   4.5   4.0  15.0  15.5  17.0   1.5   5.0   2.5  18.5
> 12 Levels: C.op1 C.op2 E.op2 E.op1 D.op1 D.op2 A.op1 B.op1 ... F.op2
>
>
> boxplot(count ~ bymedian, data = DF,
>         xlab = "Interaction of spray and op", ylab = "Insect count",
>         main = "Modified InsectSprays Data", varwidth = TRUE,
>         col = "lightgray")
>
>
> Regards,
>
> Marc Schwartz
>
>

	[[alternative HTML version deleted]]



More information about the R-help mailing list