[R] merge two data frames

arnaud Gaboury arnaud.gaboury at gmail.com
Tue May 11 16:25:16 CEST 2010


Dear group,

I have these 2 following data frame:

allcon <-
structure(list(DESCRIPTION = structure(1:17, .Label = c("COFFEE C Jul/10", 
"COPPER May/10", "CORN Jul/10", "CORN May/10", "COTTON NO.2 Jul/10", 
"CRUDE OIL miNY May/10", "GOLD Jun/10", "HENRY HUB NATURAL GAS May/10", 
"ROBUSTA COFFEE (10) Jul/10", "SILVER May/10", "SOYBEANS Jul/10", 
"SPCL HIGH GRADE ZINC USD", "STANDARD LEAD USD", "SUGAR NO.11 Jul/10", 
"SUGAR NO.11 May/10", "WHEAT Jul/10", "WHEAT May/10"), class = "factor"), 
    pl = c(4.75000000000003, -14, -1.5, 7.75, 3.74999999999999, 
    5.22500000000005, 21.6999999999998, -0.0490000000000013, 
    68, 45.300, -8.5, -41, -118, 2.72999999999999, -0.900000000000002, 
    -64.25, 0), quantity = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0), SHORTDESCRIPTION = c("COFFEE C", "COPPER", 
    "CORN", "CORN", "COTTON NO.2", "CRUDE OIL miNY", "GOLD", 
    "HENRY HUB NATURAL GAS", "ROBUSTA COFFEE (10)", "SILVER", 
    "SOYBEANS", "SPCL HIGH GRADE ZINC USD", "STANDARD LEAD USD", 
    "SUGAR NO.11", "SUGAR NO.11", "WHEAT", "WHEAT")), .Names =
c("DESCRIPTION", 
"pl", "quantity", "SHORTDESCRIPTION"), row.names = c(NA, -17L
), class = "data.frame")

value <-
structure(list(SHORTDESCRIPTION = structure(c(7L, 11L, 10L, 14L, 
9L, 22L, 16L, 17L, 4L, 19L, 27L, 18L, 26L, 2L, 3L, 21L, 6L, 15L, 
1L, 28L, 5L, 25L, 23L, 24L, 29L, 8L, 12L, 13L, 20L), .Label = c(" SUGAR
NO.11", 
"COCOA", "COFFEE C", "COPPER", "CORN", "COTTON NO.2", "CRUDE OIL miNY", 
"FEEDER CATTLE", "GOLD", "HEATING OIL", "HENRY HUB NATURAL GAS", 
"LEAN HOG", "LIVE CATTLE", "NY HARBR GASOLINE BLDSTK", "ORANGE J.", 
"PALLADIUM", "PLATINUM", "PRIMARY NICKEL USD", "PRM HGH GD ALUMINIUM USD", 
"RANDOM LENGTH LUMBER", "ROBUSTA COFFEE (10)", "SILVER ", "SOYBEAN MEAL ", 
"SOYBEAN OIL", "SOYBEANS ", "SPCL HIGH GRADE ZINC USD", "STANDARD LEAD USD",

"SUGAR WHITE", "WHEAT"), class = "factor"), VALUE = c(500L, 10000L, 
420L, 420L, 100L, 5000L, 100L, 50L, 250L, 25L, 25L, 6L, 25L, 
10L, 375L, 10L, 500L, 150L, 1120L, 50L, 50L, 50L, 100L, 600L, 
50L, 500L, 400L, 400L, 110L)), .Names = c("SHORTDESCRIPTION", 
"VALUE"), class = "data.frame", row.names = c(NA, -29L))

I want to merge these df:

> y=merge(value,allcon,by.x="SHORTDESCRIPTION",all=T)

Here is the result:

y <-
structure(list(SHORTDESCRIPTION = structure(c(1L, 2L, 3L, 4L, 
5L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 29L, 
30L, 31L, 32L, 32L), .Label = c(" SUGAR NO.11", "COCOA", "COFFEE C", 
"COPPER", "CORN", "COTTON NO.2", "CRUDE OIL miNY", "FEEDER CATTLE", 
"GOLD", "HEATING OIL", "HENRY HUB NATURAL GAS", "LEAN HOG", "LIVE CATTLE", 
"NY HARBR GASOLINE BLDSTK", "ORANGE J.", "PALLADIUM", "PLATINUM", 
"PRIMARY NICKEL USD", "PRM HGH GD ALUMINIUM USD", "RANDOM LENGTH LUMBER", 
"ROBUSTA COFFEE (10)", "SILVER ", "SOYBEAN MEAL ", "SOYBEAN OIL", 
"SOYBEANS ", "SPCL HIGH GRADE ZINC USD", "STANDARD LEAD USD", 
"SUGAR WHITE", "WHEAT", "SILVER", "SOYBEANS", "SUGAR NO.11"), class =
"factor"), 
    VALUE = c(1120L, 10L, 375L, 250L, 50L, 50L, 500L, 500L, 500L, 
    100L, 420L, 10000L, 400L, 400L, 420L, 150L, 100L, 50L, 6L, 
    25L, 110L, 10L, 5000L, 100L, 600L, 50L, 25L, 25L, 50L, 50L, 
    50L, NA, NA, NA, NA), DESCRIPTION = structure(c(NA, NA, 1L, 
    2L, 4L, 3L, 5L, 6L, NA, 7L, NA, 8L, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, 9L, NA, NA, NA, NA, 12L, 13L, NA, 16L, 17L, 10L, 
    11L, 14L, 15L), .Label = c("COFFEE C Jul/10", "COPPER May/10", 
    "CORN Jul/10", "CORN May/10", "COTTON NO.2 Jul/10", "CRUDE OIL miNY
May/10", 
    "GOLD Jun/10", "HENRY HUB NATURAL GAS May/10", "ROBUSTA COFFEE (10)
Jul/10", 
    "SILVER May/10", "SOYBEANS Jul/10", "SPCL HIGH GRADE ZINC USD", 
    "STANDARD LEAD USD", "SUGAR NO.11 Jul/10", "SUGAR NO.11 May/10", 
    "WHEAT Jul/10", "WHEAT May/10"), class = "factor"), pl = c(NA, 
    NA, 4.75000000000003, -14, 7.75, -1.5, 3.74999999999999, 
    5.22500000000005, NA, 21.6999999999998, NA, -0.0490000000000013, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, 68, NA, NA, NA, NA, -41, 
    -118, NA, -64.25, 0, 45.300, -8.5, 2.72999999999999, -0.900000000000002
    ), quantity = c(NA, NA, 0, 0, 0, 0, 0, 0, NA, 0, NA, 0, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, 0, 0, 
    NA, 0, 0, 0, 0, 0, 0), PL = c(NA, NA, -1781.25000000001, 
    3500, -387.5, 75, -1874.99999999999, -2612.50000000003, NA, 
    -2169.99999999998, NA, 490.000000000013, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, -680, NA, NA, NA, NA, 1025, 2950, NA, 
    3212.5, 0, NA, NA, NA, NA)), .Names = c("SHORTDESCRIPTION", 
"VALUE", "DESCRIPTION", "pl", "quantity", "PL"), row.names = c(NA, 
-35L), class = "data.frame")

As you can see, the last 4 rows have a NA in y$VALUE. How can I instead get
the correct VALUE ?

TY for any help



More information about the R-help mailing list