[R] Returning Data Frame from Function for use Outside Function

Thu Mar 25 13:41:21 CET 2010

I have a function (see below) that does some bootstrapping (I am happy to
expand offline why I could use existing functions.) I put my results into
and empty matrix and add a row of results with each iteration. My problem is
i am a new user to R and I don't understand data frames, matrices, elements,
and vectors well. What I would like is to have a data frame I can manipulate
outside of the function, further it would rock if I could get the names of
the fixed effect coefficients on the columns.

TIA

-------------- cut here --------------

options(repos="http://lib.stat.cmu.edu/R/CRAN/")
myPackages <- c("Hmisc", "lme4", "Epi")
install.packages(myPackages, dependencies=TRUE)
update.packages(checkBuilt=TRUE, ask=FALSE)
library(Hmisc)
library(lme4)
library(Epi)

memory.limit(size=4095)
vbig <-
read.csv("//10.56.16.18/TREP/workgroups/wg08_01_mna_dm_mauldin/Transport
File/vcohort.csv",
                    header=TRUE)
vcohort <- vbig[,c(1:12, 14:15, 21:33, 35:36)]
rm(vbig)
vcohort <- cleanup.import(vcohort)

## Change the reference levels for some of the factor 
## values. By default R orders the factor values alphabetically
## and numerically. In some case we wish to choose different 
## reference levels

vcohort$GENDER <- Relevel(factor(vcohort$SEX), list("M", "F"), first=TRUE)
vcohort$AGE <- Relevel(factor(vcohort$AGE_CAT), list("4", "2",  "3", "1"),
first=TRUE)
vcohort$VISN <- Relevel(factor(vcohort$VISN2), list("8",  "1",  "2",  "3", 
"4",  "5",  "6",
                                                          "7",  "9", "10",
"11", "12", "13",
                                                         "14", "15", "16",
"17", "18", "19",
                                                         "20", "21", "22",
"23"), first=TRUE)

#f <- mpr100 ~ time + nhb + hispanic + other + 
#                       rural + hrural +
#                      factor(age) + factor(gender) + factor(mstat) +
factor(svcpct2) + nvaclass +
#                       a1cgrp8 + anemdef + cbd + chf + chrnlung + htn_c + 
#                          hypothy + obese + perivasc  + pulmcirc + tumor + 
#                          depress + psych + 
#                       nhb*rural + hispanic*rural + other*rural + 
#                       nhb*hrural + hispanic*hrural + other*hrural + 
#                       nhb*factor(age)  + hispanic*factor(age) +
other*factor(age)  + 
#                       rural*factor(age) +  hrural*factor(age) + 
#                       (1|id) + (1|visn2)

f <- MPR100 ~ TIME + NHB + HISPANIC + OTHER + (1|ID)

boot.rem <- function(data, id, pct, time, formula, reps, vars) {
   ## Order the observations accroding to increasing ID
   my.df2    <- vcohort[order(data$ID, decreasing=FALSE),]

   ## Create a vector of nonduplicating IDs 
   unique.id <- my.df2[!duplicated(my.df2[,"ID"]),"ID"]

   ## Determine the sample size as a percentage of
   ## the population
   sz <- length(unique.id)*(pct/100)

   ## Create a matrix of NAs with ROWS equal to the number
   ## of REPS and the number of columns equal to the number
   ## of independent variables
   ## 
   results <- matrix(NA, nrow=reps, ncol=vars)
   ## Remove the data frame to help manage memory

   ## Do a any number of regressions to bootstrap regression
   ## estimates
   for(i in 1:reps){
      ## Randomly sample the population WITHOUT replacement
      ID <- sample(unique.id, sz, replace=FALSE)

      ## Create a data fram out of the vector of randomly
      ## sampled IDs
      sample.obs <- data.frame(ID)

      ## Display the number of rows in the random sample
      nrow(sample.obs)

      ## Merge the sampled observations with the cohort to
      ## get the variable information
      all <- merge(sample.obs, vcohort, by="ID")

      ## For some reason some observations get repeated
      ## in the sampling procedure. To account for this
      ## remove all observations having duplicate ID and
      ## TIME information e.g. id=1502, time=1; id=1502,
      ## time=1 (delete one of these)
      analysis <- all[!duplicated(all[,c("ID","TIME")]),]

      ## Fit the mixed model
      fit <- lmer(formula, analysis)

      ## Output the COEFFICIENT results into a data vector
      results[i,] <- fixef(fit)
      if (i==reps) print(pct"% sample")
      if (i==reps) pring(reps "repetitions")
      if (i==reps) print(summary(results))
   }
} 

boot.rem(vcohort, ID, 10, TIME, f, 3, 5)

-- 
View this message in context: http://n4.nabble.com/Returning-Data-Frame-from-Function-for-use-Outside-Function-tp1690556p1690556.html
Sent from the R help mailing list archive at Nabble.com.