[R] Create rows for columns in dataframe

Dark info at software-solutions.nl
Tue Aug 13 11:46:38 CEST 2013


Hi experts,

I have a dataframe with 100k+ records. it has a key/id column and 25 code
columns. I would like to restructure it having a row for each code column.

I have a structure like this (used dput):
structure(list(DSYSRTKY = structure(c(1L, 2L, 3L, 3L, 4L, 4L), .Names =
c("1", 
"2", "3", "4", "5", "6"), .Label = c("100000005", "100000203", 
"100000315", "100000327"), class = "factor"), C1 = structure(c(6L, 
3L, 2L, 5L, 1L, 4L), .Names = c("1", "2", "3", "4", "5", "6"), .Label =
c("41401", 
"42831", "45341", "486", "5990", "71535"), class = "factor"), 
    C2 = structure(c(5L, 1L, 3L, 6L, 4L, 2L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("4019", "51881", "5990", 
    "6826", "78900", "V4986"), class = "factor"), C3 = structure(c(6L, 
    3L, 5L, 2L, 4L, 1L), .Names = c("1", "2", "3", "4", "5", 
    "6"), .Label = c("5119", "5939", "72400", "7850", "8052", 
    "V1251"), class = "factor"), C4 = structure(c(6L, 5L, 3L, 
    1L, 2L, 4L), .Names = c("1", "2", "3", "4", "5", "6"), .Label =
c("3109", 
    "4019", "4241", "42789", "V1011", "V454"), class = "factor"), 
    C5 = structure(c(1L, 1L, 3L, 1L, 2L, 4L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "2720", "4019", 
    "7823"), class = "factor"), C6 = structure(c(1L, 1L, 2L, 
    1L, 4L, 3L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("", 
    "311", "41400", "49390"), class = "factor"), C7 = structure(c(1L, 
    1L, 2L, 1L, 3L, 4L), .Names = c("1", "2", "3", "4", "5", 
    "6"), .Label = c("", "2724", "2859", "V4581"), class = "factor"), 
    C8 = structure(c(1L, 1L, 3L, 1L, 4L, 2L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "40390", "71680", 
    "79029"), class = "factor"), C9 = structure(c(1L, 1L, 2L, 
    1L, 4L, 3L), .Names = c("1", "2", "3", "4", "5", "6"), .Label = c("", 
    "4168", "5859", "V1582"), class = "factor"), C10 = structure(c(1L, 
    1L, 3L, 1L, 1L, 2L), .Names = c("1", "2", "3", "4", "5", 
    "6"), .Label = c("", "49390", "7804"), class = "factor"), 
    C11 = structure(c(1L, 1L, 3L, 1L, 1L, 2L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "2724", "V066"), class =
"factor"), 
    C12 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "6930"), class = "factor"), 
    C13 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "41400"), class = "factor"), 
    C14 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "V4581"), class = "factor"), 
    C15 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "40291"), class = "factor"), 
    C16 = structure(c(1L, 1L, 2L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = c("", "4280"), class = "factor"), 
    C17 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C18 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C19 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C20 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C21 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C22 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C23 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C24 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor"), 
    C25 = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Names = c("1", 
    "2", "3", "4", "5", "6"), .Label = "", class = "factor")), .Names =
c("DSYSRTKY", 
"C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "C10", 
"C11", "C12", "C13", "C14", "C15", "C16", "C17", "C18", "C19", 
"C20", "C21", "C22", "C23", "C24", "C25"), row.names = c("1", 
"2", "3", "4", "5", "6"), class = "data.frame")

Now I want to restructure this dataframe not having 25 code fields but a row
for each code but only if the code has a value!

The new structure should look something like:
NewDataFrame <- data.frame(ID=integer(), DSYSRTKY=integer(),
CODE=character(),  PRIMAIRY=logical())

The ID column should just be an increment. PRIMAIRY is a boolean which
should be true if orriginally was the first code (C1).

It has to be efficient since my real data has many more rows than my example
structure of only 6 rows.
I tried some looping mechanism and it was working but it was not performing
at all.

Hopefully I provided enough information using dput.

Regards Derk




--
View this message in context: http://r.789695.n4.nabble.com/Create-rows-for-columns-in-dataframe-tp4673607.html
Sent from the R help mailing list archive at Nabble.com.



More information about the R-help mailing list