[R] average and median values for each of the class

David Winsemius dwinsemius at comcast.net
Sun Apr 27 06:54:44 CEST 2014


On Apr 26, 2014, at 5:37 PM, Nico Met wrote:

> Dear all,
> 
> 
> 
> I have a matrix (dimension, 16 x 12) where  2nd column represents class
> (1,1,1,1,1,2,2,2, etc) information. I want to estimate average  and median
> values for each of the class and add this information as a row at end of
> the each classes.
> 
Well it does have a dimension attribute but it is a data.frame, NOT a matrix. The term "class" is a reserved word in R. What is it that you mean by that word? if it is for each column then:

sapply( dat, function(x) c( mean(x), median(x)) )



> sapply( dat, function(x) c( mean_x = mean(x), median_x = median(x)) )
          class    name1    name2     name3    name4    name5     name6
mean_x   2.4375 2.350258 1.102291 0.5358036 2.343448 1.895963 0.6242466
median_x 2.0000 2.436813 1.094910 0.5478146 2.421528 1.942289 0.6497279
           name7     name8    name9    name10    name11
mean_x   1.67054 0.2742449 2.094122 0.6388536 0.3736069
median_x 1.72933 0.2770331 2.106486 0.6322816 0.3623650

-- 
David.


> 
> for example:
> 
> dput(dat)
> 
> structure(list(class = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
> 
> 3L, 3L, 3L, 4L, 4L, 4L, 5L), name1 = c(2.554923977, 2.371586762,
> 
> 2.497293431, 2.464827875, 2.981934845, 2.228995664, 2.099640729,
> 
> 1.900314302, 2.630005966, 2.632590262, 2.581887814, 2.408797563,
> 
> 2.098761103, 3.070460716, 1.436980716, 1.645121806), name2 = c(1.297412278,
> 
> 1.104804244, 1.30621114, 1.126009533, 1.466740841, 1.012041118,
> 
> 0.923466541, 0.840575023, 1.285530176, 1.041909333, 1.194917856,
> 
> 1.085015826, 1.047492703, 1.587558217, 0.593340012, 0.723630088
> 
> ), name3 = c(0.587160798, 0.596127884, 0.623760721, 0.549016135,
> 
> 0.686642084, 0.487523394, 0.458620467, 0.397974913, 0.615928976,
> 
> 0.546005649, 0.657383069, 0.546613129, 0.476503461, 0.749062102,
> 
> 0.304160587, 0.29037358), name4 = c(2.833441759, 2.713374426,
> 
> 2.532626548, 2.409093102, 3.014912721, 2.113507947, 2.017291324,
> 
> 1.667744912, 2.602560666, 2.31649643, 2.761204809, 2.433963493,
> 
> 2.229911767, 3.191646399, 1.269919241, 1.387479858), name5 = c(2.172365295,
> 
> 1.955695471, 2.141072829, 1.975743278, 2.377018372, 1.791300389,
> 
> 1.669079382, 1.500209628, 2.164401874, 1.830038378, 2.106750025,
> 
> 1.92888294, 1.707217549, 2.585082653, 1.114841754, 1.315712452
> 
> ), name6 = c(0.715129844, 0.688186262, 0.70133748, 0.709362008,
> 
> 0.712145174, 0.563593885, 0.532109761, 0.472197304, 0.690165016,
> 
> 0.65635473, 0.615835066, 0.64310098, 0.562974891, 0.900622255,
> 
> 0.408546784, 0.416284408), name7 = c(1.995505133, 1.860095899,
> 
> 1.843151597, 1.709861774, 2.155993511, 1.506409746, 1.315405587,
> 
> 1.234544153, 1.96629927, 1.74879757, 1.93994009, 1.660173854,
> 
> 1.556735295, 2.355723318, 0.866634243, 1.013367677), name8 = c(0.275484997,
> 
> 0.233856392, 0.294021245, 0.315504347, 0.251906585, 0.250263636,
> 
> 0.348599173, 0.273806933, 0.32067937, 0.278581115, 0.293726291,
> 
> 0.308350808, 0.201297444, 0.351927886, 0.204230625, 0.185681471
> 
> ), name9 = c(2.461066627, 2.210756164, 2.289047888, 2.253988252,
> 
> 2.668184733, 1.911697836, 1.793443775, 1.560027186, 2.36941155,
> 
> 1.961911111, 2.391501376, 2.002215107, 1.932144233, 2.73705052,
> 
> 1.15580754, 1.807697999), name10 = c(0.723025351, 0.613147422,
> 
> 0.805399925, 0.65651577, 0.779389048, 0.54260459, 0.492283542,
> 
> 0.507969501, 0.749700016, 0.644231327, 0.810319215, 0.620331891,
> 
> 0.600240557, 0.884775748, 0.40006142, 0.391661912), name11 = c(0.308565619,
> 
> 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.361013073,
> 
> 0.430744786, 0.468818055, 0.166072668, 0.369262627, 0.297666411,
> 
> 0.256091173, 0.123021464, 0.308188684, 0.646436241, 0.722972632
> 
> )), .Names = c("class", "name1", "name2", "name3", "name4", "name5",
> 
> "name6", "name7", "name8", "name9", "name10", "name11"), class = "data.frame",
> row.names = c("ara1",
> 
> "ara2", "ara3", "ara4", "ara5", "ara6", "ara7", "ara8", "ara9",
> 
> "ara10", "ara11", "ara12", "ara13", "ara14", "ara15", "ara16"
> 
> ))
> 
> 
> I wrote this:
> 
> 
> 
> avg<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"],
> function(x) mean(x,na.rm=T)) )
> 
> 
> med<-as.data.frame(aggregate(dat[,2:dim(dat)[2]], dat["class"], function(x)
> median(x,na.rm=T)) )
> 
> 
> # avg
> 
> #  class    name1     name2     name3    name4    name5     name6    name7
>    name#8    name9    name10    name11
> 
> #1     1 2.574113 1.2602356 0.6085415 2.700690 2.124379 0.7052322 1.912922
> #0.2741547 2.376609 0.7154955 0.3654845
> 
> #2     2 2.214739 1.0154032 0.4900119 2.100276 1.781248 0.5645165 1.505665
> #0.2983373 1.908645 0.5731394 0.3566621
> 
> #3     3 2.541092 1.1072810 0.5833339 2.503888 1.955224 0.6384303 1.782971
> #0.2935527 2.118543 0.6916275 0.3076734
> 
> #4     4 2.202068 1.0761303 0.5099087 2.230492 1.802381 0.6240480 1.593031
> #0.2524853 1.941667 0.6283592 0.3592155
> 
> #5     5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
> #0.1856815 1.807698 0.3916619 0.7229726
> 
> #> med
> 
> #  class    name1     name2     name3    name4    name5     name6    name7
>    name#8    name9    name10    name11
> 
> #1     1 2.497293 1.2974123 0.5961279 2.713374 2.141073 0.7093620 1.860096
> #0.2754850 2.289048 0.7230254 0.3637169
> 
> #2     2 2.164318 0.9677538 0.4730719 2.065400 1.730190 0.5478518 1.410908
> #0.2972432 1.852571 0.5252870 0.3958789
> 
> #3     3 2.581888 1.0850158 0.5466131 2.433963 1.928883 0.6431010 1.748798
> #0.2937263 2.002215 0.6442313 0.2976664
> 
> #4     4 2.098761 1.0474927 0.4765035 2.229912 1.707218 0.5629749 1.556735
> #0.2042306 1.932144 0.6002406 0.3081887
> 
> #5     5 1.645122 0.7236301 0.2903736 1.387480 1.315712 0.4162844 1.013368
> #0.1856815 1.807698 0.3916619 0.7229726
> 
> 
> 
> 
> But I do not know how can I add this information in the original data?
> 
> 
> For example, for class 1, the output will look like this:
> 
> dput(res1)
> 
> structure(list(class = c(1L, 1L, 1L, 1L, 1L, 1L, 1L), name1 =
> c(2.554923977,
> 
> 2.371586762, 2.497293431, 2.464827875, 2.981934845, 2.574113378,
> 
> 2.497293431), name2 = c(1.297412278, 1.104804244, 1.30621114,
> 
> 1.126009533, 1.466740841, 1.260235607, 1.297412278), name3 = c(0.587160798,
> 
> 0.596127884, 0.623760721, 0.549016135, 0.686642084, 0.608541525,
> 
> 0.596127884), name4 = c(2.833441759, 2.713374426, 2.532626548,
> 
> 2.409093102, 3.014912721, 2.700689711, 2.713374426), name5 = c(2.172365295,
> 
> 1.955695471, 2.141072829, 1.975743278, 2.377018372, 2.124379049,
> 
> 2.141072829), name6 = c(0.715129844, 0.688186262, 0.70133748,
> 
> 0.709362008, 0.712145174, 0.705232154, 0.709362008), name7 = c(1.995505133,
> 
> 1.860095899, 1.843151597, 1.709861774, 2.155993511, 1.912921583,
> 
> 1.860095899), name8 = c(0.275484997, 0.233856392, 0.294021245,
> 
> 0.315504347, 0.251906585, 0.274154713, 0.275484997), name9 = c(2.461066627,
> 
> 2.210756164, 2.289047888, 2.253988252, 2.668184733, 2.376608733,
> 
> 2.289047888), name10 = c(0.723025351, 0.613147422, 0.805399925,
> 
> 0.65651577, 0.779389048, 0.715495503, 0.723025351), name11 = c(0.308565619,
> 
> 0.453808281, 0.363716904, 0.376332596, 0.324998876, 0.365484455,
> 
> 0.363716904)), .Names = c("class", "name1", "name2", "name3",
> 
> "name4", "name5", "name6", "name7", "name8", "name9", "name10",
> 
> "name11"), class = "data.frame", row.names = c("ara1", "ara2",
> 
> "ara3", "ara4", "ara5", "Avg", "Med"))
> 
> 
> 
> And same will be for other classes.
> 
> 
> Thanks a lot !!!!
> 
> 
> Nico
> 
> 	[[alternative HTML version deleted]]
> 
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.

David Winsemius
Alameda, CA, USA




More information about the R-help mailing list