[R] How to pass variable column name into R function

Jeff Newmiller jdnewmil at dcn.davis.ca.us
Fri Dec 19 09:01:21 CET 2014


HTML mutilated your email. Please post in plain text.

Although you are playing with some tricky stuff, you seem to have 
difficulty understanding the difference between a symbol and the value the 
symbol represents. Re-reading section 6.1 of the Introduction to R that 
comes with the software seems called for.

data$myvar is (almost) equivalent to data[[ "myvar" ]], but the first form 
looks at the characters in the symbol myvar directly, while the other form 
can use either a string literal or a variable to accomplish the same task:

myvar <- "season"
# dta defined below
str( dta[[ "season" ]] )
str( dta[[ myvar ]] )

The "almost" above alludes to the "abbreviated" form that is mentioned in 
Section 6.1. (go... read... it....)

Oh, and "df" is the name of a function in base R... so using it as a 
variable name is poor form (confusing).

dta <- structure(list(datetime = structure(c(14975, 14975, 14975, 14975, 
14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 
14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 
14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 
14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 
14976, 14976, 14976, 14977, 14977, 14977), class = "Date"), season = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), holiday = c(0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), workingday = c(0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), weather = c(1L, 1L, 1L, 
1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), temp = c(9.84, 9.02, 9.02, 
9.84, 9.84, 9.84, 9.02, 8.2, 9.84, 13.12, 15.58, 14.76, 17.22, 18.86, 
18.86, 18.04, 17.22, 18.04, 17.22, 17.22, 16.4, 16.4, 16.4, 18.86, 18.86, 
18.04, 17.22, 18.86, 18.86, 17.22, 16.4, 16.4, 15.58, 14.76, 14.76, 14.76, 
14.76, 14.76, 13.94, 13.94, 13.94, 14.76, 13.12, 12.3, 10.66, 9.84, 9.02, 
9.02, 8.2, 6.56), atemp = c(14.395, 13.635, 13.635, 14.395, 14.395, 12.88, 
13.635, 12.88, 14.395, 17.425, 19.695, 16.665, 21.21, 22.725, 22.725, 
21.97, 21.21, 21.97, 21.21, 21.21, 20.455, 20.455, 20.455, 22.725, 22.725, 
21.97, 21.21, 22.725, 22.725, 21.21, 20.455, 20.455, 19.695, 17.425, 
16.665, 16.665, 17.425, 17.425, 16.665, 16.665, 16.665, 16.665, 14.395, 
13.635, 11.365, 10.605, 11.365, 9.85, 8.335, 6.82), humidity = c(81L, 80L, 
80L, 75L, 75L, 75L, 80L, 86L, 75L, 76L, 76L, 81L, 77L, 72L, 72L, 77L, 82L, 
82L, 88L, 88L, 87L, 87L, 94L, 88L, 88L, 94L, 100L, 94L, 94L, 77L, 76L, 
71L, 76L, 81L, 71L, 66L, 66L, 76L, 81L, 71L, 57L, 46L, 42L, 39L, 44L, 44L, 
47L, 44L, 44L, 47L), windspeed = c(0, 0, 0, 0, 0, 6.0032,  0, 0, 0, 0, 
16.9979, 19.0012, 19.0012, 19.9995, 19.0012, 19.9995, 19.9995, 19.0012, 
16.9979, 16.9979, 16.9979, 12.998, 15.0013, 19.9995, 19.9995, 16.9979, 
19.0012, 12.998, 12.998, 19.9995, 12.998, 15.0013, 15.0013, 15.0013, 
16.9979, 19.9995, 8.9981, 12.998, 11.0014, 11.0014, 12.998, 22.0028, 
30.0026, 23.9994, 22.0028, 19.9995, 11.0014, 23.9994, 27.9993, 26.0027), 
casual = c(3L, 8L, 5L, 3L, 0L, 0L, 2L, 1L, 1L, 8L, 12L, 26L, 29L, 47L, 
35L, 40L, 41L, 15L, 9L, 6L, 11L, 3L, 11L, 15L, 4L, 1L, 1L, 2L, 2L, 0L, 0L, 
0L, 1L, 7L, 16L, 20L, 11L, 4L, 19L, 9L, 7L, 10L, 1L, 5L, 11L, 0L, 0L, 0L, 
0L, 0L), registered = c(13L, 32L, 27L, 10L, 1L, 1L, 0L, 2L, 7L, 6L, 24L, 
30L, 55L, 47L, 71L, 70L, 52L, 52L, 26L, 31L, 25L, 31L, 17L, 24L, 13L, 16L, 
8L, 4L, 1L, 2L, 1L, 8L, 19L, 46L, 54L, 73L, 64L, 55L, 55L, 67L, 58L, 43L, 
29L, 17L, 20L, 9L, 8L, 5L, 2L, 1L), count = c(16L, 40L, 32L, 13L, 1L, 1L, 
2L, 3L, 8L, 14L, 36L, 56L, 84L, 94L, 106L, 110L, 93L, 67L, 35L, 37L, 36L, 
34L, 28L, 39L, 17L, 17L, 9L, 6L, 3L, 2L, 1L, 8L, 20L, 53L, 70L, 93L, 75L, 
59L, 74L, 76L, 65L, 53L, 30L, 22L, 31L, 9L, 8L, 5L, 2L, 1L)), .Names = 
c("datetime", "season", "holiday", "workingday", "weather", "temp", 
"atemp", "humidity", "windspeed", "casual",  "registered", "count"), 
row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", 
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", 
"24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", 
"36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", 
"48", "49", "50"), class = "data.frame")

binarize <- function( data, myvar ) {
   # here I avoid modifying the "myvar" column in data
   mydf <- data.frame( as.character( data[[ myvar ]] )
                     , stringsAsFactors=FALSE )
   names( mydf ) <- myvar
   temp <- model.matrix( as.formula( paste( "~", myvar, " - 1" ) )
                       , data=mydf )
   data <- data.frame( data, temp )
   data
}
# your subset only had one value in the season column, which breaks your 
# "trick" with model.matrix
newdta <- binarize( dta, "workingday" )
newdta
# I think your idea of pasting these columns into the same data frame may
# lead to column name collisions down the road... but that is your
# minefield.

On Thu, 18 Dec 2014, Jeff Johnson wrote:

>
> I know this has been explained a few times here in different scenarios, but I am having a hard time digesting this.
>
> The following code works fine as long as it's not inside a function (see below).
>
> df$season <- as.character(df$season) temp <- model.matrix( ~ season - 1, data=df) df <- cbind(df,temp)
> BEFORE:
>
> head(df[c(1,2)]) datetime season 1 2011-01-01 1 2 2011-01-01 1 3 2011-01-01 1 4 2011-01-01 1 5 2011-01-01 1 6 2011-01-01 1
> AFTER:
>
>> head(df[c(1,2,13:16)]) datetime season season1 season2 season3 season4 1 2011-01-01 1 1 0 0 0 2 2011-01-01 1 1 0 0 0 3 2011-01-01      1 1 0 0 0 4 2011-01-01 1 1 0 0 0 5 2011-01-01 1 1 0 0 0 6 2011-01-01      1 1 0 0 0
> However, when I try to wrap it in a multi-use function:
>
> binarize <- function(data, myvar) { data$myvar <- as.character(data$myvar) temp <- model.matrix( ~ myvar - 1, data=data) data <- cbind(data,temp) }
> it throws an error, undoubtedly because it cannot evaluate myvar or data (or both?): Error in $<-.data.frame(*tmp*, "myvar", value = character(0)) : replacement has 0 rows, data has 10886
>
> I've tried experimenting with eval(substitute()) but still it's not working. My ideal end-state is that you start with a dataframe and a variable, have the function map all of the values for the selected variable into separate binary columns and append that to the original dataframe. Again, when it's not in a function it works perfectly.
>
> Here's the dput data if it helps to reproduce.
>
>> dput(head(df,50)) structure(list(datetime = structure(c(14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14975, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14976, 14977, 14977, 14977), class = "Date"), season = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), holiday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), workingday = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,!
>
>  0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), weather = c(1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), temp = c(9.84, 9.02, 9.02, 9.84, 9.84, 9.84, 9.02, 8.2, 9.84, 13.12, 15.58, 14.76, 17.22, 18.86, 18.86, 18.04, 17.22, 18.04, 17.22, 17.22, 16.4, 16.4, 16.4, 18.86, 18.86, 18.04, 17.22, 18.86, 18.86, 17.22, 16.4, 16.4, 15.58, 14.76, 14.76, 14.76, 14.76, 14.76, 13.94, 13.94, 13.94, 14.76, 13.12, 12.3, 10.66, 9.84, 9.02, 9.02, 8.2, 6.56), atemp = c(14.395, 13.635, 13.635, 14.395, 14.395, 12.88, 13.635, 12.88, 14.395, 17.425, 19.695, 16.665, 21.21, 22.725, 22.725, 21.97, 21.21, 21.97, 21.21, 21.21, 20.455, 20.455, 20.455, 22.725, 22.725, 21.97, 21.21, 22.725, 22.725, 21.21, 20.455, 20.455, 19.695, 17.425, 16.665, 16.665, 17.425, 17.425, 16.665, 16.665, 16.665, 16.665, 14.395, 13.635, 11.365, !
>
> 10.605, 11.365, 9.85, 8.335, 6.82), humidity = c(81L, 80L, 80L, 75L, 7
> 5L, 75L, 80L, 86L, 75L, 76L, 76L, 81L, 77L, 72L, 72L, 77L, 82L, 82L, 88L, 88L, 87L, 87L, 94L, 88L, 88L, 94L, 100L, 94L, 94L, 77L, 76L, 71L, 76L, 81L, 71L, 66L, 66L, 76L, 81L, 71L, 57L, 46L, 42L, 39L, 44L, 44L, 47L, 44L, 44L, 47L), windspeed = c(0, 0, 0, 0, 0, 6.0032,  0, 0, 0, 0, 16.9979, 19.0012, 19.0012, 19.9995, 19.0012, 19.9995, 19.9995, 19.0012, 16.9979, 16.9979, 16.9979, 12.998, 15.0013, 19.9995, 19.9995, 16.9979, 19.0012, 12.998, 12.998, 19.9995, 12.998, 15.0013, 15.0013, 15.0013, 16.9979, 19.9995, 8.9981, 12.998, 11.0014, 11.0014, 12.998, 22.0028, 30.0026, 23.9994, 22.0028, 19.9995, 11.0014, 23.9994, 27.9993, 26.0027), casual = c(3L, 8L, 5L, 3L, 0L, 0L, 2L, 1L, 1L, 8L, 12L, 26L, 29L, 47L, 35L, 40L, 41L, 15L, 9L, 6L, 11L, 3L, 11L, 15L, 4L, 1L, 1L, 2L, 2L, 0L, 0L, 0L, 1L, 7L, 16L, 20L, 11L, 4L, 19L, 9L, 7L, 10L, 1L, 5L, 11L, 0L, 0L, 0L, 0L, 0L), registered = c(13L, 32L, 27L, 10L, 1L, 1L, 0L, 2L, 7L, 6L, 24L, 30L, 55L, 47L, 71L, 70L, 52L, 52L, 26L, 31L, 25L, 31L, 17L, 2!
>
> 4L, 13L, 16L, 8L, 4L, 1L, 2L, 1L, 8L, 19L, 46L, 54L, 73L, 64L, 55L, 55L, 67L, 58L, 43L, 29L, 17L, 20L, 9L, 8L, 5L, 2L, 1L), count = c(16L, 40L, 32L, 13L, 1L, 1L, 2L, 3L, 8L, 14L, 36L, 56L, 84L, 94L, 106L, 110L, 93L, 67L, 35L, 37L, 36L, 34L, 28L, 39L, 17L, 17L, 9L, 6L, 3L, 2L, 1L, 8L, 20L, 53L, 70L, 93L, 75L, 59L, 74L, 76L, 65L, 53L, 30L, 22L, 31L, 9L, 8L, 5L, 2L, 1L)), .Names = c("datetime", "season", "holiday", "workingday", "weather", "temp", "atemp", "humidity", "windspeed", "casual",  "registered", "count"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50"), class = "data.frame")
> Thanks for the help in advance!
>
>
>
>
> 	[[alternative HTML version deleted]]
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>

---------------------------------------------------------------------------
Jeff Newmiller                        The     .....       .....  Go Live...
DCN:<jdnewmil at dcn.davis.ca.us>        Basics: ##.#.       ##.#.  Live Go...
                                       Live:   OO#.. Dead: OO#..  Playing
Research Engineer (Solar/Batteries            O.O#.       #.O#.  with
/Software/Embedded Controllers)               .OO#.       .OO#.  rocks...1k



More information about the R-help mailing list