[R] assign multiple variables at once

Brian Diggs diggsb at ohsu.edu
Fri Aug 13 22:11:25 CEST 2010


On 8/13/2010 11:08 AM, Hosack, Michael wrote:
> R Experts,
>
> I would like to create a series of variables without having
> to assign a separate line of code for each new variable. My dataframe (DF) contains
> two groups of linked variables (ESP1:ESP9) and (ECRL1:ECRL9). Within ESP1:ESP9 are
> abbreviated species codes (full dataframe contains 26 codes). ECRL1 represents the
> number of species x in variable ESP1 harvested, and so on through ESP9 and ECRL9.
> What I want to do is create 26 new variables (one for each unique species code) that
> contains the number harvested (ECRL) for each corresponding species code listed for
> each row of the data set. Example (row 14), the new variable YP Harvest would equal
> 90 (ECRL2) and WP Harvest would equal 0 (ECRL1), all other species code var's would
> contain NA.
>
> I hope I made this clear enough.
>
> Thank you,
>
> Mike
>
> Current method: one line per species
>
> EBTCH1.h$YP.H<- with(EBTCH1.h,ifelse(ESP1 %in% 'YP',ECRL1,ifelse(ESP2 %in% 'YP',ECRL2,
> ifelse(ESP3 %in% 'YP',ECRL3,ifelse(ESP4 %in% 'YP',ECRL4,ifelse(ESP5 %in% 'YP',ECRL5,
> ifelse(ESP6 %in% 'YP',ECRL6,ifelse(ESP7 %in% 'YP',ECRL7,ifelse(ESP8 %in% 'YP',ECRL8,
> ifelse(ESP9 %in% 'YP',ECRL9,0))))))))))
>
>
> DF<-
> structure(list(MM = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
> 5L, 5L, 5L, 5L, 5L), DD = c(3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L,
> 8L, 8L, 8L, 8L, 8L, 8L), DTYPE = c(2, 2, 2, 2, 2, 2, 1, 1, 1,
> 1, 1, 1, 1, 1, 1), TOD = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
> 1, 1, 1), SITENUM = c("102", "104", "104", "104", "101", "101",
> "102", "103", "101", "101", "101", "101", "103", "103", "103"
> ), CURTIM = c(1450L, 1736L, 1755L, 1804L, 1950L, 2007L, 1150L,
> 1450L, 2000L, 1003L, 1030L, 1036L, 1300L, 1310L, 1320L), GRPFSH = c(2L,
> 2L, 2L, 2L, 2L, 3L, 1L, 2L, 3L, 2L, 4L, 1L, 1L, 3L, 1L), EEFF = c(11.5,
> 19, 5, 20, 0, 0, 5, 8, 0, 0, 0, 0, 3, 12, 6), ESP1 = c("SMB",
> "SMB", "SMB", "SMB", NA, NA, "YP", "YP", NA, NA, NA, NA, "RG",
> "WP", "WP"), ESP2 = c(NA, "RB", NA, NA, NA, NA, NA, NA, NA, NA,
> NA, NA, "SMB", "YP", "YP"), ESP3 = c(NA, NA, NA, NA, NA, NA,
> NA, NA, NA, NA, NA, NA, "RB", "RBS", NA), ESP4 = c(NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
> NA_character_, NA_character_, NA_character_, NA_character_),
>      ESP5 = c(NA_character_, NA_character_, NA_character_, NA_character_,
>      NA_character_, NA_character_, NA_character_, NA_character_,
>      NA_character_, NA_character_, NA_character_, NA_character_,
>      NA_character_, NA_character_, NA_character_), ESP6 = c(NA,
>      NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
>      ESP7 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
>      NA, NA, NA), ESP8 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
>      NA, NA, NA, NA, NA, NA), ESP9 = c(NA, NA, NA, NA, NA, NA,
>      NA, NA, NA, NA, NA, NA, NA, NA, NA), ECRL1 = c(0L, 0L, 0L,
>      0L, 0L, 0L, 4L, 5L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ECRL2 = c(0L,
>      0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 90L, 30L),
>      ECRL3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
>      0L, 0L, 0L), ECRL4 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
>      0L, 0L, 0L, 0L, 0L, 0L), ECRL5 = c(0L, 0L, 0L, 0L, 0L, 0L,
>      0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ECRL6 = c(0L, 0L, 0L,
>      0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), ECRL7 = c(0L,
>      0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L),
>      ECRL8 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
>      0L, 0L, 0L), ECRL9 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
>      0L, 0L, 0L, 0L, 0L, 0L)), .Names = c("MM", "DD", "DTYPE",
> "TOD", "SITENUM", "CURTIM", "GRPFSH", "EEFF", "ESP1", "ESP2",
> "ESP3", "ESP4", "ESP5", "ESP6", "ESP7", "ESP8", "ESP9", "ECRL1",
> "ECRL2", "ECRL3", "ECRL4", "ECRL5", "ECRL6", "ECRL7", "ECRL8",
> "ECRL9"), row.names = c(NA, 15L), class = "data.frame")
>

Michael,

An easier approach is to reshape this into a long format (one row for 
each ESP/ECRL combination, carrying the rest of the identifying 
variables), and then reshape it back to wide format using the ESP value 
to define the new column:

# for convenience, variables that don't vary for each observation
idvar <- names(DF)[1:8]

DFr <- reshape(DF, 
varying=list(c("ESP1","ESP2","ESP3","ESP4","ESP5","ESP6","ESP7","ESP8","ESP9"), 
c("ECRL1","ECRL2","ECRL3","ECRL4","ECRL5","ECRL6","ECRL7","ECRL8","ECRL9")), 
timevar=NULL, idvar=idvar, direction="long", v.name=c("ESP","ECRL"))

DFr <- DFr[!is.na(DFr$ESP),]

DFr <- reshape(DFr, timevar="ESP", idvar=idvar, direction="wide")

merge(DF, DFr, by=idvar)


If you want to rename columns, or convert NA's to 0, you can do that to 
DFr or after the merge.

--
Brian Diggs
Senior Research Associate, Department of Surgery, Oregon Health & 
Science University



More information about the R-help mailing list