[R] doubt with Odds ratio - URGENT HELP NEEDED

Michael Dewey lists at dewey.myzen.co.uk
Wed Sep 23 17:29:37 CEST 2015


Dear Rosa

Can you remove all the code which is not relevant to calculating the 
odds ratio so we can see what is going on?

On 23/09/2015 16:06, Rosa Oliveira wrote:
> Dear Michael,
>
>
> I found some of the errors, but others I wasn’t able to.
>
> And my huge huge problem concerns OR and OR confidence interval :(
>
>
> *New Corrected code:*
>
>
> casedata <-read.spss("tas_05112008.sav")
> tas.data<-data.frame(casedata)
>
> #Delete patients that were not discharged
> tas.data                     <- tas.data[ tas.data$hosp!="si ",]
> tas.data$resultado.hosp      <- ifelse(tas.data$hosp=="l", 0, 1)
>
> tas.data$tas_d2      <-
> log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA,
> tas.data$tas_d2))
> tas.data$tas_d3      <-
> log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA,
> tas.data$tas_d3))
> tas.data$tas_d4      <-
> log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA,
> tas.data$tas_d4))
> tas.data$tas_d5      <-
> log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA,
> tas.data$tas_d5))
> tas.data$tas_d6      <-
> log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA,
> tas.data$tas_d6))
>
>      tas.data$age      <-
> ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>
>
>      tas.data                     <-   data.frame(tas1 =
> tas.data$tas_d2, tas2 = tas.data$tas_d3,
>                                               tas3 = tas.data$tas_d4,
> tas4 = tas.data$tas_d5,
>                                               tas5 = tas.data$tas_d6,
> age = tas.data$age,
>                                               discharge =
> tas.data$resultado.hosp, id.pat=tas.data$id)
>
> #    tas.data$discharge              <- factor(   tas.data$discharge ,
> levels=c(0,1), labels = c("dead", "alive"))
>
>    #select only cases that have more than 3 tas
>      tas.data                      <- tas.data[apply(tas.data[,-8:-6],
> 1, function(x) sum(!is.na(x)))>2,]
>
>
>      nsample <- n.obs              <- dim(tas.data)[1]  #nr of patients
> with more than 2 tas measurements
>
>      tas.data.long                 <- data.frame(
> tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs),
> age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
>                                         id=rep(c(1:n.obs), 5))
>      tas.data.long                 <- tas.data.long
>   [order(tas.data.long$id),]
>
>      age=tas.data$age
>
> ##################################################################################################
> #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE  DEATh
> ##################################################################################################
>    mean.alive                      <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
>    mean.dead                       <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
>    stderr                          <- function(x)
> sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
>    se.alive                        <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
>    se.dead                         <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
>    summarytas                      <- data.frame (media = c(mean.dead,
> mean.alive),
>                                        standarderror = c( se.dead,
> se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>
>
> ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
>      geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2*
> standarderror), width=.1) +
>    scale_color_manual(values=c("blue", "red")) +
>     theme(legend.text=element_text(size=20),
> axis.text=element_text(size=16),
> axis.title=element_text(size=20,face="bold")) +
>     scale_x_continuous(name="Days") +
>    scale_y_continuous(name="log tas") +
>    geom_line() +
>      geom_point()
>
>
> library(verification)
> prev <- summary(glm(tas.data[,7]~tas.data[,4],family=binomial(link=probit)))
> answer            = c(prev$coefficients[,1:2])
>
>
> roc.plot(tas.data[,7], prev, show.thres = FALSE, legen=F )
>
>
>
> modelo<-function (dataainit)
> {
>
> #dataa<-tas.data
> dataa<-dataainit
>
> dataa$ident<-seq(1:90)
> tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
> dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
> time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5),
> ident=rep(dataa$ident,5))
> tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
> tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
> #mixed model for the longitudinal tas
> lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days,
> na.action=na.exclude )
> #Random intercept and slopes
> pred.lme<-predict(lme.1)
> lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
> lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
> selector<-as.numeric(names(lme.intercept)) #to select not NA rows. Apply
> to the vector in the dataset
>   test(dataa$intercept[resultado.hosp==1],
> dataa$intercept[resultado.hosp==0])
> print(summary(model.surv1))
> return(model.surv1$coef)
>   }
>
>
> *CONSOLE RESULT: (errors in red)*
>
>  > casedata <-read.spss("tas_05112008.sav")
> Warning message:
> In read.spss("tas_05112008.sav") :
>    tas_05112008.sav: Unrecognized record type 7, subtype 18 encountered
> in system file
>  > tas.data<-data.frame(casedata)
>  >
>  > #Delete patients that were not discharged
>  > tas.data                     <- tas.data[ tas.data$hosp!="si ",]
>  > tas.data$resultado.hosp      <- ifelse(tas.data$hosp=="l", 0, 1)
>  >
>  > tas.data$tas_d2      <-
> log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA,
> tas.data$tas_d2))
>  > tas.data$tas_d3      <-
> log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA,
> tas.data$tas_d3))
>  > tas.data$tas_d4      <-
> log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA,
> tas.data$tas_d4))
>  > tas.data$tas_d5      <-
> log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA,
> tas.data$tas_d5))
>  > tas.data$tas_d6      <-
> log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA,
> tas.data$tas_d6))
>  >
>  >     tas.data$age      <-
> ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>  >
>  >
>  >     tas.data                     <-   data.frame(tas1 =
> tas.data$tas_d2, tas2 = tas.data$tas_d3,
> +                                              tas3 = tas.data$tas_d4,
> tas4 = tas.data$tas_d5,
> +                                              tas5 = tas.data$tas_d6,
> age = tas.data$age,
> +                                              discharge =
> tas.data$resultado.hosp, id.pat=tas.data$id)
>  >
>  > #    tas.data$discharge              <- factor(   tas.data$discharge
> , levels=c(0,1), labels = c("dead", "alive"))
>  >
>  >   #select only cases that have more than 3 tas
>  >     tas.data                      <- tas.data[apply(tas.data[,-8:-6],
> 1, function(x) sum(!is.na(x)))>2,]
>  >
>  >
>  >
>  >     nsample <- n.obs              <- dim(tas.data)[1]  #nr of
> patients with more than 2 tas measurements
>  >
>  >     tas.data.long                 <- data.frame(
> tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs),
> age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
> +                                        id=rep(c(1:n.obs), 5))
>  >     tas.data.long                 <- tas.data.long
>   [order(tas.data.long$id),]
>  >
>  >     age=tas.data$age
>  >
>  >
> ##################################################################################################
>  > #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE  DEATh
>  >
> ##################################################################################################
>  >   mean.alive                      <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
>  >   mean.dead                       <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
>  >   stderr                          <- function(x)
> sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
>  >   se.alive                        <-
> apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
>  >   se.dead                         <-
> apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
>  >   summarytas                      <- data.frame (media = c(mean.dead,
> mean.alive),
> +                                       standarderror = c( se.dead,
> se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>  >
>  >
>  > ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
> +     geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2*
> standarderror), width=.1) +
> +   scale_color_manual(values=c("blue", "red")) +
> +    theme(legend.text=element_text(size=20),
> axis.text=element_text(size=16),
> axis.title=element_text(size=20,face="bold")) +
> +    scale_x_continuous(name="Days") +
> +   scale_y_continuous(name="log tas") +
> +   geom_line() +
> +     geom_point()
> Error in mean - 2 * standarderror :
>    non-numeric argument to binary operator
>  >
>  >
>  > library(verification)
>  > prev <-
> summary(glm(tas.data[,7]~tas.data[,4],family=binomial(link=probit)))
>  > answer            = c(prev$coefficients[,1:2])
>  >
>  >
>  > roc.plot(tas.data[,7], prev, show.thres = FALSE, legen=F )
> Error in is.finite(x) : default method not implemented for type 'list'
>  >
>  >
>  >
>  > modelo<-function (dataainit)
> +
> + {
> +
> + #dataa<-tas.data
> + dataa<-dataainit
> +
> + dataa$ident<-seq(1:90)
> + tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
> + dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
> + time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5),
> ident=rep(dataa$ident,5))
> +
> + tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
> + tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
> +
> + #mixed model for the longitudinal tas
> + lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days,
> na.action=na.exclude )
> +
> + #Random intercept and slopes
> + pred.lme<-predict(lme.1)
> + lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
> + lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
> + selector<-as.numeric(names(lme.intercept)) #to select not NA rows.
> Apply to the vector in the dataset
> +
> +  test(dataa$intercept[resultado.hosp==1],
> dataa$intercept[resultado.hosp==0])
> +
> + print(summary(model.surv1))
> + return(model.surv1$coef)
> +
> +  }
>  >
>
> I can’t get the OR and OR CI :(
>
>
> *DATA:*
>
>
>
>
>
>
> Best,
>
> RO
>
>
>
>
> Atenciosamente,
> Rosa Oliveira
>
> --
> ____________________________________________________________________________
>
>
>
>
> Rosa Celeste dos Santos Oliveira,
>
> E-mail:rosita21 at gmail.com <mailto:rosita21 at gmail.com>
> Tlm: +351 939355143
> Linkedin: https://pt.linkedin.com/in/rosacsoliveira
> ____________________________________________________________________________
> "Many admire, few know"
> Hippocrates
>
>> On 23 Sep 2015, at 12:02, Michael Dewey <lists at dewey.myzen.co.uk
>> <mailto:lists at dewey.myzen.co.uk>> wrote:
>>
>> Dear Rosa
>>
>> It would help if you posted the error messages where they occur so
>> that we can see which of your commands caused which error. However see
>> comment inline below.
>>
>> On 22/09/2015 22:17, Rosa Oliveira wrote:
>>> Dear all,
>>>
>>>
>>> I’m trying to compute Odds ratio and OR confidence interval.
>>>
>>> I’m really naive, sorry for that.
>>>
>>>
>>> I attach my data and my code.
>>>
>>> I’m having lots of errors:
>>>
>>> 1. Error in data.frame(tas1 = tas.data$tas_d2, tas2 =
>>> tas.data$tas_d3, tas3 = tas.data$tas_d4,  :
>>>   arguments imply differing number of rows: 90, 0
>>>
>>
>> At least one of tas_d2, tas_d3, tas_d4 does not exist
>>
>> I suggest fixing that one and hoping the rest go away.
>>
>>> 2. Error in data.frame(tas = c(unlist(tas.data[, -8:-6])), time =
>>> rep(c(0:4),  :
>>>   arguments imply differing number of rows: 630, 450, 0
>>>
>>> 3. Error: object 'tas.data.long' not found
>>>
>>> 4. Error in data.frame(media = c(mean.dead, mean.alive),
>>> standarderror = c(se.dead,  :
>>>   arguments imply differing number of rows: 14, 10
>>>
>>> 5. Error in ggplot(summarytas, aes(x = c(c(1:5), c(1:5)), y = mean,
>>> colour = discharge)) :
>>>   object 'summarytas' not found
>>>
>>> 6. Error in summary(glm(tas.data[, 6] ~ tas.data[, 4], family =
>>> binomial(link = probit))) :
>>>   error in evaluating the argument 'object' in selecting a method for
>>> function 'summary': Error in eval(expr, envir, enclos) : y values
>>> must be 0 <= y <= 1
>>>
>>> 7. Error in wilcox.test.default(pred[obs == 1], pred[obs == 0],
>>> alternative = "great") :
>>>   not enough (finite) 'x' observations
>>> In addition: Warning message:
>>> In is.finite(x) & apply(pred, 1, f) :
>>>   longer object length is not a multiple of shorter object length
>>>
>>>
>>> and off course I’m not getting OR.
>>>
>>> Nonetheless all this errors, I think I have not been able to compute
>>> de code to get OR and OR confidence interval.
>>>
>>>
>>> Can anyone help me please. It’s really urgent.
>>>
>>> PLEASE
>>>
>>> THE CODE:
>>>
>>> the hospital outcome is discharge.
>>>
>>> require(gdata)
>>> library(foreign)
>>> library(nlme)
>>> library(lme4)
>>> library(boot)
>>> library(MASS)
>>> library(Hmisc)
>>> library(plotrix)
>>> library(verification)
>>> library(mvtnorm)
>>> library(statmod)
>>> library(epiR)
>>>
>>> #########################################################################################
>>> # Data preparation
>>>                                                                      #
>>> #########################################################################################
>>>
>>> setwd("/Users/RO/Desktop")
>>>
>>> casedata <-read.spss("tas_05112008.sav")
>>> tas.data<-data.frame(casedata)
>>>
>>> #Delete patients that were not discharged
>>> tas.data                     <- tas.data[ tas.data$hosp!="si ",]
>>> tas.data$resultado.hosp      <- ifelse(tas.data$hosp=="l", 0, 1)
>>>
>>> tas.data$tas_d2      <-
>>> log(ifelse(tas.data$tas_d2==88888|tas.data$tas_d2==99999, NA,
>>> tas.data$tas_d2))
>>> tas.data$tas_d3      <-
>>> log(ifelse(tas.data$tas_d3==88888|tas.data$tas_d3==99999, NA,
>>> tas.data$tas_d3))
>>> tas.data$tas_d4      <-
>>> log(ifelse(tas.data$tas_d4==88888|tas.data$tas_d4==99999, NA,
>>> tas.data$tas_d4))
>>> tas.data$tas_d5      <-
>>> log(ifelse(tas.data$tas_d5==88888|tas.data$tas_d5==99999, NA,
>>> tas.data$tas_d5))
>>> tas.data$tas_d6      <-
>>> log(ifelse(tas.data$tas_d6==88888|tas.data$tas_d6==99999, NA,
>>> tas.data$tas_d6))
>>>
>>>     tas.data$age      <-
>>> ifelse(tas.data$age==88888|tas.data$age==99999, NA, tas.data$age)
>>>
>>>
>>>     tas.data                     <-   data.frame(tas1 =
>>> tas.data$tas_d2, tas2 = tas.data$tas_d3,
>>>                                              tas3 = tas.data$tas_d4,
>>> tas4 = tas.data$tas_d5,
>>>                                              tas5 = tas.data$tas_d6,
>>> age = tas.data$age,
>>>                                              discharge =
>>> tas.data$resultado.hosp, id.pat=tas.data$ID)
>>>
>>> #    tas.data$discharge              <- factor(   tas.data$discharge
>>> , levels=c(0,1), labels = c("dead", "alive"))
>>>
>>>   #select only cases that have more than 3 tas
>>>     tas.data                      <- tas.data[apply(tas.data[,-8:-6],
>>> 1, function(x) sum(!is.na(x)))>2,]
>>>
>>>
>>>
>>>     nsample <- n.obs              <- dim(tas.data)[1]  #nr of
>>> patients with more than 2 tas measurements
>>>
>>>     tas.data.long                 <- data.frame(
>>> tas=c(unlist(tas.data[,-8:-6])), time=rep(c(0:4), each=n.obs),
>>> age=rep(tas.data$age, 5), discharge=rep(tas.data$discharge, 5),
>>>                                        id=rep(c(1:n.obs), 5))
>>>     tas.data.long                 <- tas.data.long
>>>  [order(tas.data.long$id),]
>>>
>>>     age=tas.data$age
>>>
>>> ##################################################################################################
>>> #PLOT EMPIRICAL MEANS OF CRP FOR ALIVE  DEATh
>>> ##################################################################################################
>>>   mean.alive                      <-
>>> apply(tas.data[tas.data$discharge==0, -8:-6], 2, mean, na.rm=T)
>>>   mean.dead                       <-
>>> apply(tas.data[tas.data$discharge==1, -8:-6], 2, mean, na.rm=T)
>>>   stderr                          <- function(x)
>>> sqrt(var(x,na.rm=TRUE)/length(na.omit(x)))
>>>   se.alive                        <-
>>> apply(tas.data[tas.data$discharge==0, -8:-6], 2, stderr)
>>>   se.dead                         <-
>>> apply(tas.data[tas.data$discharge==1, -8:-6], 2, stderr)
>>>   summarytas                      <- data.frame (media = c(mean.dead,
>>> mean.alive),
>>>                                       standarderror = c( se.dead,
>>> se.alive), discharge = c(rep("dead",5), rep("alive", 5)))
>>>
>>>
>>> ggplot(summarytas, aes(x=c(c(1:5), c(1:5)), y=mean, colour=discharge)) +
>>>     geom_errorbar(aes(ymin=mean-2* standarderror, ymax=media+2*
>>> standarderror), width=.1) +
>>>   scale_color_manual(values=c("blue", "red")) +
>>>    theme(legend.text=element_text(size=20),
>>> axis.text=element_text(size=16),
>>> axis.title=element_text(size=20,face="bold")) +
>>>    scale_x_continuous(name="Days") +
>>>   scale_y_continuous(name="log tas") +
>>>   geom_line() +
>>>     geom_point()
>>>
>>>
>>> library(verification)
>>> prev <-
>>> summary(glm(tas.data[,6]~tas.data[,4],family=binomial(link=probit)))
>>> answer = c(prev$coefficients[,1:2])
>>>
>>>
>>> roc.plot(tas.data[,6], prev, show.thres = FALSE, legen=F )
>>>
>>>
>>> modelo<-function (dataainit)
>>>
>>> {
>>>
>>> #dataa<-tas.data
>>> dataa<-dataainit
>>>
>>> dataa$ident<-seq(1:90)
>>> tas.6days<-cbind(id=rep(dataa$id,5),tas=c(dataa$tas_d2, dataa$tas_d3,
>>> dataa$tas_d4, dataa$tas_d5, dataa$tas_d6),
>>> time=rep(c(2:6)-2, each=90), out.come=rep(dataa$hosp,5),
>>> ident=rep(dataa$ident,5))
>>>
>>> tas.6days<-data.frame(tas.6days[order(tas.6days[,1]),])
>>> tas.6days$tas[tas.6days$tas==88888|tas.6days$tas==99999 ]<-NA
>>>
>>> #mixed model for the longitudinal tas
>>> lme.1 <- lme(tas~ time+1, random = ~ time+1 |ident, data=tas.6days,
>>> na.action=na.exclude )
>>>
>>> #Random intercept and slopes
>>> pred.lme<-predict(lme.1)
>>> lme.intercept<-lme.1$coef$random$ident[,1]+lme.1$coef$fixed[1]
>>> lme.slope<- lme.1$coef$random$ident[,2]+lme.1$coef$fixed[2]
>>> selector<-as.numeric(names(lme.intercept)) #to select not NA rows.
>>> Apply to the vector in the dataset
>>>
>>>  test(dataa$intercept[resultado.hosp==1],
>>> dataa$intercept[resultado.hosp==0])
>>>
>>> print(summary(model.surv1))
>>> return(model.surv1$coef)
>>>
>>>  }
>>>
>>>
>>>
>>>
>>> Best,
>>> RO
>>>
>>>
>>>
>>> ______________________________________________
>>> R-help at r-project.org <mailto:R-help at r-project.org> mailing list -- To
>>> UNSUBSCRIBE and more, see
>>> https://stat.ethz.ch/mailman/listinfo/r-help
>>> PLEASE do read the posting guide
>>> http://www.R-project.org/posting-guide.html
>>> and provide commented, minimal, self-contained, reproducible code.
>>>
>>
>> --
>> Michael
>> http://www.dewey.myzen.co.uk/home.html
>

-- 
Michael
http://www.dewey.myzen.co.uk/home.html



More information about the R-help mailing list