# [R] Bland Altman summary stats for all column combinations

arun smartpink111 at yahoo.com
Thu Jul 18 16:07:44 CEST 2013

```HI,
#dat1: data
Combn1<-combn(colnames(dat1)[2:5],2)
#For the first part, may be this helps:
library(plyr)
res<-lapply(split(Combn1,col(Combn1)),function(x) {x1<-cbind(Var1=colnames(dat1[,x])[1],Var2=colnames(dat1[,x])[2],dat1[,x],Method=dat1[,6]);colnames(x1)[3:4]<- c("V1","V2"); ddply(x1,.(Method,Var1,Var2),summarize, mean1=mean(V1-V2,na.rm=TRUE),sd1=sd(V1-V2,na.rm=TRUE))})
res[[1]]
#        Method Var1 Var2      mean1      sd1
#1 Simple_2_ROI   G1   G2 -0.6684211 5.223882
#2   Single_ROI   G1   G2  1.1263158 2.313929
#3 WIG_drawn_bg   G1   G2 -1.0894737 4.876576
#4   WIG_Method   G1   G2 -1.1684211 4.894447

Also,
G1 G1 0 0 Simple_2_ROI
#part is not clear bcz ur ddply() code didn't do that.

A.K.

Hello,

I have the following data.frame

structure(list(Study = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L), .Label = c("WCBP12236", "WCBP12241", "WCBP12242", "WCBP12243",
"WCBP12245", "WCBP13001", "WCBP13002", "WCBP13003", "WCBP13004",
"WCBP13005", "WCBP13006", "WCBP13007", "WCBP13008", "WCBP13009",
"WCBP13010", "WCBP13011", "WCBP13012", "WCBP13013", "WCBP13014"
), class = "factor"), G1 = c(68, 68.6, 66.6, 73.1, 51.6, 50.1,
64.1, 73, 63.7, 43.2, 62.3, 59.2, 67.5, 68.2, 54.6, 67.9, 56.5,
54.2, 67.3, 68, 68.4, 67.9, 73.3, 51.7, 50.3, 63.9, 73.9, 64,
42.9, 62.5, 59.3, 66.7, 68.4, 54, 68.2, 56.8, 54.5, 67, 53.2,
41.4, 53, 52.3, 41, 37.4, 56.9, 65.3, 36.2, 35.3, 36.1, 32.5,
56.5, 47.7, 39.4, 59.6, 38.1, 24.2, 30.2, 68.5, 68.9, 70.7, 74.9,
53.4, 51.6, 65.9, 75.7, 64.7, 42.8, 61.4, 60.8, 69.5, 68.7, 55.9,
70.7, 59.5, 51.1, 69.5), G2 = c(79.8, 72.2, 73.5, 74.4, 50.4,
54.8, 63.1, 70.4, 63.6, 45.1, 65.3, 49.4, 65.3, 76.2, 51, 63.9,
58.7, 57.8, 67, 79.6, 72.1, 73.9, 74.7, 50.5, 55.1, 62.8, 70.5,
63.3, 44.6, 65.5, 48.9, 64.9, 76.3, 50.6, 64.8, 58.6, 58.3, 67.4,
51.2, 37.7, 49.1, 53.7, 44.6, 37.3, 54.9, 64.1, 33.8, 31.9, 34.2,
30.3, 56.2, 44.6, 38.2, 63.2, 35.8, 26.5, 27.6, 80.6, 71.6, 75.4,
77.1, 52.4, 56.3, 66, 72.3, 64.5, 38.2, 64.3, 49.2, 66.9, 77.1,
52.4, 67.5, 59.6, 55.6, 69.9), S1 = c(75.1, 65.9, 72.7, 68.8,
49, 57.5, 66.5, 74.1, 60.9, 51.8, 58, 64.3, 71.1, 71.4, 58.9,
62.2, 58, 57.7, 58.6, 75.2, 66, 73.2, 69.7, 48.9, 57.7, 66.5,
74.7, 60.8, 51.4, 58.9, 65.5, 70.5, 71.4, 58.9, 65.1, 60.8, 57.7,
58.4, 54.3, 40.2, 52.6, 60.5, 42.6, 34.1, 55, 64.7, 36.3, 32.5,
39, 38.8, 58.1, 48, 40.5, 61, 40, 26.4, 28.8, 76.4, 66.5, 73.9,
72, 50.7, 59.2, 69.9, 76.3, 62.4, 50, 58.5, 66.6, 73.7, 72.3,
62.6, 69.6, 62.7, 57.9, 61.1), S2 = c(76.6, 71.6, 71.2, 72.7,
51.6, 56.7, 65.9, 73.5, 63.6, 55.2, 62.6, 62.2, 69.1, 71.1, 56.8,
61, 61.7, 60, 55.7, 76.9, 71.6, 72.3, 73.2, 51.7, 56.8, 64.5,
74.9, 63.6, 51.3, 63, 62.8, 68.7, 71.3, 56.8, 64.2, 62.8, 60.4,
55.8, 53.6, 42.5, 50, 54.4, 42.2, 36.4, 57.7, 64.1, 35.1, 30.8,
39.1, 37.4, 58.7, 47.8, 42, 58.8, 39.4, 24.2, 28.2, 78.2, 73.3,
72.3, 75.6, 53.4, 57.8, 68.3, 76.6, 63.7, 51.7, 63.4, 63.3, 71.5,
72.3, 60.2, 67.1, 65.5, 58.2, 59.1), Method = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Simple_2_ROI",
"Single_ROI", "WIG_drawn_bg", "WIG_Method"), class = "factor")), .Names = c("Study",
"G1", "G2", "S1", "S2", "Method"), row.names = c(NA, -76L), class = "data.frame")

This shows the measurement results of 2 operators using 4
slightly different analysis methods. The operators analyse each data
file WCBPXXXXX twice - (G1,G2,S1,S2) and there are 4 methods
(Single_ROI,Simple_2_ROI etc)

What I would like to do is get the summary statistics from a Bland Altman (Mean vs Difference) plot for all 16 combinations.

For example for G1 and G2, I can do the following

ddply(df,.(Method),summarise,mean=mean(G1-G2, na.rm = TRUE),
sd=sd(G1-G2, na.rm = TRUE))

Eventually I want a data.frame with the format
Var1 Var2 Mean  Sd Method
G1   G2   -.67  5.2 Simple_2_ROI
G1   G2   1.12  2.3 Single_ROI
G1   G2   -1.08 4.8 WIG_drawn_bg
G1   G2   -1.17 4.9 WIG_Method
G1 G1 0 0 Simple_2_ROI

Is there a simple way to achieve this structure?

```