[R] How to formulate an (effect-modifying) interaction with matching variable in a conditional logistic regression?

Mon Jun 13 12:49:51 CEST 2011

Hi,

I would like to see if a matching variable is an effect-modifier in a
conditional logistic regression. Naturally, the matching variable
can't enter directly in the model but as an interaction with terms
that are in.

However, I have problems in formulating the correct model the term
that's already in the model is a factor. I am using treatment
contrasts and the problem is that if I write:
update(model, . ~ . + factorX:matchingvariableY)
then I get one extra level for the level that is contained in the
intercept which is conditioned away, and I get numerical problems.  I
show an example below using data from Hosmer and Lemeshow's Applied
Logistic Regression (2nd ed. p.230 onwards)

I can solve the problem by introducing new variables that contains the
interaction I want, but I'd like to avoid this unwieldy solution. Can
you suggest a better one?

Thank you for your help!

Fredrik Nilsson

# Example
# Code Sheet for the Variables in the 1-1 matched Low Birth Weight Study
# Described in Section 7.3 Page 230
#
# Name     Description                Codes/Values
# pair     Pair                       1 - 56
# low      Low Birth Weight           1 = BWT<=2500g,
#                                     0 = BWT>2500g
# age      Age of Mother              Years
# lwt      Weight of Mother at        Pounds
#          Last Menstrual Period
# race     Race                       1 = White
#                                     2 = Black
#                                     3 = Other
# smoke    Smoking Status             0 = No,1 = Yes
#          During Pregnancy
# ptd      History of Premature Labor 0 = None,1 = Yes
# ht       History of Hypertension    0 = No, 1 = Yes
# ui       Presence of Uterine        0 = No, 1 = Yes
#          Irritability

pair<-rep(1:56, each=2)
low<-rep(c(0,1), 56)
age<-c(14,15,16,17,17,17,17,17,18,18,19,19,19,20,20,20,20,20,
       20,20,20,21,21,21,21,21,22,22,23,23,23,23,23,24,24,24,
       24,24,25,25,25,25,25,25,26,26,26,26,27,28,28,29,30,31,32,35)
age<-rep(age,each=2)
# actually the last one should be 34 not 35?
age[112]<-34
lwt<-c(135,101,98,115,95,130,103,130,122,110,113,120,113,120,119,142,
       100,148,90,110,150,91,115,102,235,112,120,150,103,125,169,120,
       141,80,121,109,127,121,120,122,158,105,108,165,124,200,185,103,
       160,100,115,130,95,130,158,130,130,97,128,187,119,120,115,110,
       190,94,90,128,115,132,110,155,115,138,110,105,118,105,120,85,
       155,115,125,92,140,89,241,105,113,117,168,96,133,154,160,190,
       124,130,120,120,130,95,135,130,95,142,215,102,121,105,170,187)
race<-c(0,2,1,2,2,2,2,2,0,0,1,0,1,1,2,1,0,2,0,1,2,0,2,0,0,0,2,0,2,2,2,
        1,0,2,1,2,2,0,2,1,0,2,0,0,2,1,1,2,0,2,0,0,2,0,1,0,1,2,2,1,2,2,
        2,0,0,2,0,1,0,2,2,0,2,0,2,1,0,2,2,2,0,2,1,0,0,2,1,2,0,0,1,2,2,
        2,2,0,0,1,2,2,2,0,0,0,0,0,0,0,2,0,0,1)
race<-as.factor(race);levels(race)<-c("white","black","other")
smoke<-c(0,1,0,0,0,0,0,1,1,1,0,1,0,0,0,0,1,0,1,1,0,1,0,0,1,1,0,1,0,0,0,
         1,0,1,1,0,0,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,
         1,1,0,1,1,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,1,
         0,0,1,1,0,0,1,0,1,0,0,1,1,1,1,0,1,0,1)
smoke<-as.factor(smoke);levels(smoke)<-c("no","yes")
ptd<-c(0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,
       1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,
       0,0,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,
       0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0)
ptd<-as.factor(ptd); levels(ptd)<-c("no","yes")
ht<-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,1)
ht<-as.factor(ht);levels(ht)<-c("no","yes")
ui<-c(0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,1,1,0,1,
      1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,
      1,0,0,0,1,0,0,0,0,0,0,0,0)
dataset<-data.frame(pair,low,age,lwt,race,smoke,ptd,ht,ui)

library(survival)
# model with all covariates
mult.cl<-clogit(low~lwt+race+smoke+ptd+ht+ui+strata(pair),data=dataset)
summary(mult.cl)

# H&L drop race
mult2.cl<-update(mult.cl,.~.-race)
summary(mult2.cl)

######################
# check interactions #
######################

multi1.cl<-update(mult2.cl,.~.+age:lwt)
summary(multi1.cl)
anova(mult2.cl,multi1.cl)

# then comes the interaction with smoke
# no good! Here's the problem
multi2.cl<-update(mult2.cl,.~.+age:smoke)
summary(multi2.cl)
anova(mult2.cl,multi2.cl)

# has to define my own variable?
dataset$ageNsmoke<-as.numeric(dataset$smoke=="yes")*dataset$age
multi2a.cl<-update(mult2.cl,.~.+ageNsmoke)
summary(multi2a.cl)
anova(mult2.cl,multi2a.cl)

# but suppose I had a model with race, then I'd have to
# define two variables
dataset$ageNblack<-as.numeric(dataset$race=="black")*dataset$age
dataset$ageNother<-as.numeric(dataset$race=="other")*dataset$age
multi3.cl<-update(mult.cl,.~.+race:age)
summary(multi3.cl)
anova(multi3.cl, mult.cl)

multi3.cl<-update(mult.cl,.~.+ageNblack+ageNother)
summary(multi3.cl)
anova(multi3.cl, mult.cl)

# even more unwieldy with factors as matching variables
# I fake a second matching variable "type" with three levels
dataset$type<-c(rep(1,36),rep(2,38),rep(3,38))
dataset$type<-as.factor(dataset$type)
levels(dataset$type)<-c("type1","type2","type3")
dataset$smokeNtype1<-as.numeric(dataset$smoke=="yes")*as.numeric(dataset$type=="type1")
dataset$smokeNtype2<-as.numeric(dataset$smoke=="yes")*as.numeric(dataset$type=="type2")
multi4.cl<-update(mult.cl,.~.+smokeNtype1+smokeNtype2)