[R] Problem with reshaping data from wide to long format

Gabor Grothendieck ggrothendieck at gmail.com
Thu Mar 25 19:31:21 CET 2010


Try new.row.names = 1:150 as an arg to reshape.

On Thu, Mar 25, 2010 at 2:04 PM, Lauri Nikkinen <lauri.nikkinen at iki.fi> wrote:
> Hi,
>
> I have a data.frame in wide format which I would like to reshape into
> a long format:
>
> example (nonsense) data:
>
>> dput(perus2)
> structure(list(id = c(30L, 38L, 21L, 12L, 22L, 28L, 31L, 44L,
> 8L, 47L, 23L, 20L, 41L, 42L, 29L, 50L, 5L, 33L, 4L, 17L, 11L,
> 1L, 18L, 6L, 9L, 32L, 16L, 14L, 39L, 48L, 37L, 43L, 25L, 27L,
> 35L, 40L, 45L, 49L, 2L, 34L, 10L, 15L, 24L, 26L, 46L, 7L, 13L,
> 3L, 19L, 36L), height1 = c(157.10664436, 162.77883643, 162.57095167,
> 155.82784611, 158.17430233, 156.21247668, 159.33063095, 160.47626229,
> 157.54657592, 155.01414518, 158.5172723, 162.89026674, 156.08622305,
> 159.32250375, 162.23610226, 160.21190346, NA, 155.54527115, 155.94912976,
> 160.49239576, 159.29210577, 158.93378272, 159.56724207, 162.36425594,
> 161.60135907, 161.75075865, 162.17477449, 157.24285415, NA, NA,
> 160.51351242, 159.91685827, NA, 161.20032847, 163.04696627, 161.12942938,
> NA, 158.80416016, 155.23140708, 156.31264334, 162.37744195, 160.90847312,
> NA, NA, 161.25101061, 158.11268844, 157.49338051, 156.92212718,
> 155.53316098, 157.68964234), height2 = c(168.21926725, 161.56637515,
> 167.67702789, NA, 162.30948163, 167.82148015, NA, 167.22602729,
> 163.12053538, NA, 165.14942295, 167.87354665, 160.38044351, 164.10900509,
> 168.07467582, NA, NA, 163.03725294, 167.76514949, 161.69014727,
> 161.5809546, 161.14341254, 164.80047183, 162.05997825, 164.11475164,
> 166.40195203, NA, 166.0826708, 165.7232283, 164.05199508, 167.3839719,
> 166.66838331, 162.50198958, 167.25560972, 167.67882525, 160.43653059,
> 163.93406069, 160.84337152, 163.74551267, 163.51428188, 163.29711762,
> NA, 168.345659, NA, 168.00421653, NA, 167.63059459, NA, NA, 168.30745505
> ), height3 = c(170.90466092, NA, 167.38512765, 172.00014248,
> 171.84958505, NA, 169.97286094, NA, NA, 169.53023204, 167.64452841,
> 170.94253421, NA, NA, 169.11010524, 172.06679578, NA, 168.09847664,
> 169.42974919, 171.17927523, 166.7736197, 169.12926314, 166.34368104,
> 173.18168378, NA, 170.345414, NA, 172.09205926, NA, NA, NA, NA,
> 170.59135788, 167.48001013, 170.83121727, 167.90871568, 169.7154018,
> NA, 171.11203348, 165.25337916, NA, NA, NA, 167.2210484, 166.26445432,
> 167.3911325, NA, NA, 169.20495164, 171.19481305), weight1 = c(NA,
> 42.12117886, 35.006739718, 30.104617789, 37.596524918, 32.681661769,
> 43.273125397, 40.919884581, 41.417712325, 32.363735308, 33.492860521,
> 40.679869782, 41.051721029, 39.156918258, 35.594038489, 44.788378759,
> 31.706213547, 37.600275868, 30.562918803, NA, 37.842347921, 34.049919571,
> 35.390284683, 41.169443504, 36.314510306, NA, 32.315887067, 34.815182022,
> 43.753041021, 42.631667022, 43.964726468, 38.665918845, 35.758478283,
> 31.642950399, 33.31580307, 41.171896621, 34.11696779, 33.872158362,
> 42.70435576, 41.153390371, 43.830697643, 35.223807744, NA, 37.210599185,
> 44.775833501, 30.659131352, 40.399049686, 44.368921711, 41.850601771,
> 37.850971621), weight2 = c(46.614122964, 49.810266131, NA, 51.600870342,
> 49.009289571, 44.041339142, 52.877022993, NA, 49.99494035, 40.401355811,
> 43.25422231, 53.470358871, 52.762958304, 52.586173237, 50.286215216,
> NA, 44.273412353, 40.473174437, 51.956636192, 46.237083243, 40.679582358,
> NA, NA, 53.008553857, 47.919812716, 48.191047925, 54.024111477,
> NA, 45.548940909, NA, 53.838601463, NA, 50.50111976, 53.918903127,
> 45.481404898, 54.465265395, 50.656535116, 53.037833591, 53.346378286,
> NA, 40.582047152, 44.724081659, 50.858338403, NA, NA, 40.679801628,
> 44.8645364, 44.011082672, 51.955119361, NA), weight3 = c(55.198326807,
> 54.174241705, NA, 54.069526393, 52.746444632, NA, NA, 61.469501304,
> 51.508762087, 58.266821929, 58.647671581, 59.09318286, 52.884083243,
> 63.804598792, 58.559146181, NA, 56.556534826, 51.071810157, NA,
> 62.371304581, NA, 55.025181044, 55.16444531, 59.571659821, 52.516719702,
> NA, 57.547876457, 61.692374636, 57.952322011, 64.959944764, 61.631626094,
> 50.46350485, 51.336554259, 60.036043059, NA, NA, 52.306778299,
> 50.312191646, 57.281096013, 52.174733394, 51.751740252, NA, 61.142851937,
> 64.93667359, 57.041472991, 56.285936174, 63.046773681, NA, NA,
> 55.571892474), sp = c(0L, 0L, 0L, 0L, 1L, 0L, NA, 0L, 0L, 0L,
> 1L, 1L, 1L, 0L, 0L, 0L, 0L, NA, 0L, 1L, 0L, 1L, NA, 0L, 0L, 0L,
> 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
> 0L, NA, 1L, 0L, 0L, 0L, 1L, 1L), bmi1 = c(NA, 15.8966143320452,
> 13.245421082447, 12.3977530712043, 15.027123283702, 13.3928404493200,
> 17.0458911682568, 15.8895939465852, 16.6866120773622, 13.4683992248259,
> 13.3290457842967, 15.3316637234161, 16.8500900221585, 15.4260331046336,
> 13.5232867850418, 17.4492104953802, NA, 15.540944398053, 12.5669219098308,
> NA, 14.9138430142300, 13.4798062642435, 13.8994168072589, 15.6168753286232,
> 13.9056141468016, NA, 12.2871023625512, 14.0807839118743, NA,
> NA, 17.0640132231600, 15.1195838206639, NA, 12.1771351358901,
> 12.5321285552922, 15.8580993895571, NA, 13.4313331175885, 17.7220103647077,
> 16.8429207495369, 16.6236731966879, 13.6043712362307, NA, NA,
> 17.2202236903033, 12.2638378494873, 16.2872041214028, 18.0181625678781,
> 17.3003826973727, 15.2219641887793), bmi2 = c(16.4727475841863,
> 19.081692780389, NA, NA, 18.6033272435455, 15.6374315460333,
> NA, NA, 18.7892211535392, NA, 15.8589484308730, 18.9735487397628,
> 20.5128647833983, 19.5257058569701, 17.8009993860965, NA, NA,
> 15.2262715351502, 18.4602479341394, 17.6857432790636, 15.5810293774179,
> NA, NA, 20.1834035524235, 17.7917993591549, 17.4040185299102,
> NA, NA, 16.5848486941011, NA, 19.2161345801121, NA, 19.1242161059593,
> 19.2743465419037, 16.1762342002915, 21.1598751272938, 18.8493807769291,
> 20.5012074556641, 19.8960161913625, NA, 15.2186774698114, NA,
> 17.9456140418110, NA, NA, NA, 15.9660179598009, NA, NA, NA),
>    bmi3 = c(18.8980981166013, NA, NA, 18.2765808591596, 17.8606070520157,
>    NA, NA, NA, NA, 20.2734200134073, 20.8675761771662, 20.2226053401634,
>    NA, NA, 20.4764948398200, NA, NA, 18.0739757601718, NA, 21.2854316398646,
>    NA, 19.2364074182253, 19.9363954704867, 19.8625857537345,
>    NA, NA, NA, 20.8309858751195, NA, NA, NA, NA, 17.6405719924585,
>    21.4035643852817, NA, NA, 18.1599838638817, NA, 19.563664852469,
>    19.1055498419989, NA, NA, NA, 23.2224562874904, 20.6344030442757,
>    20.087921316982, NA, NA, NA, 18.9615564192124), spf = structure(c(2L,
>    2L, 2L, 2L, 1L, 2L, NA, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
>    2L, NA, 2L, 1L, 2L, 1L, NA, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
>    1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, NA, 1L, 2L,
>    2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor")),
> .Names = c("id",
> "height1", "height2", "height3", "weight1", "weight2", "weight3",
> "sp", "bmi1", "bmi2", "bmi3", "spf"), row.names = c(NA, -50L), class =
> "data.frame")
>>
>
> I'm doing this but I get an error:
>
>> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"),
> +                                c("weight1", "weight2", "weight3"),
> +                                c("bmi1", "bmi2", "bmi3")),
> +                 v.names=c("heigth","weigth", "bmi"),
> +                 timevar = "time",
> +                 times = factor(c(1, 2, 3)),
> +                 idvar = c("id", "sp"),
> +                 direction = "long")
> Error in `row.names<-.data.frame`(`*tmp*`, value = c("30.0.1", "38.0.1",  :
>  duplicate 'row.names' are not allowed
> In addition: Warning message:
> non-unique value when setting 'row.names': ‘NA.1’
>>
>
> This will fix this error, but I would like to avoid this kind of tweak
> for NA values:
>
>> perus2$sp[is.na(perus2$sp)] <- 999
>> perus2_trans <- reshape(perus2, varying = list(c("height1", "height2", "height3"),
> +                                c("weight1", "weight2", "weight3"),
> +                                c("bmi1", "bmi2", "bmi3")),
> +                 v.names=c("heigth","weigth", "bmi"),
> +                 timevar = "time",
> +                 times = factor(c(1, 2, 3)),
> +                 idvar = c("id", "sp"),
> +                 direction = "long")
>> perus2_trans <- perus2_trans[order(perus2_trans$id, perus2_trans$time),]
>> perus2_trans$sp[perus2_trans$sp == 999] <- NA
>
> How to avoid this <- 999 substitute? Thanks in advance.
>
> -Lauri
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



More information about the R-help mailing list