[R] Type III ANOVA of package car depends on factor level order

Xiaoxu LI lixiaoxu at gmail.com
Tue Nov 18 06:43:38 CET 2008

```## Use model.matrix
## Data is the same
## continue
m <- model.matrix(lm(rep(0,length(y)) ~  disease + drug +disease:drug));
## Model.matrix(lm(y~...)) will drop is.na(y) rows. That result will
be Type II rather than III
## for residuals of (disease:drug) will be orthogonal to disease and
drug within !is.na(y) rows
## while we need residuals of (disease:drug) orthogonal to disease and
drug within balanced design rows
c <- attr(m,'assign')==3;
x_interaction <-residuals( lm(m[,c] ~ m[,!c]));

## Compare to http://www.otago.ac.nz/sas/stat/chap30/sect52.htm
## Type III SS: 2997.471860, 415.873046, 707.266259
c(	anova(lm(y~x_interaction+disease),lm(y~disease * drug))\$'Sum of Sq'[2]
,	anova(lm(y~x_interaction+drug),lm(y~disease*drug))\$'Sum of Sq'[2]
,	anova(lm(y~disease+drug),lm(y~disease*drug))\$'Sum of Sq'[2])
##

## LI, Xiaoxu
## School of Humanities & Social Sciences, Shenzhen Graduate School of
Peking Univ.
## http://lixiaoxu.lxxm.com/

On Tue, Nov 18, 2008 at 7:19 AM, Xiaoxu LI <lixiaoxu at gmail.com> wrote:
> ## I got it. IV(s) of interaction should be orthogonal to main effect IV(s).
> ## Type III ANOVA / Interaction alone
>
> x_interaction<-cbind(
>         (drug==2)&(disease==2)
>        ,(drug==3)&(disease==2)
>        ,(drug==4)&(disease==2)
>        ,(drug==2)&(disease==3)
>        ,(drug==3)&(disease==3)
>        ,(drug==4)&(disease==3));
> x_interaction <- residuals(lm(x_interaction~disease+drug));
>
> ## replace level order
> x_interaction_2<-cbind(
>         (drug==2)&(disease==2)
>        ,(drug==3)&(disease==2)
>        ,(drug==1)&(disease==2)
>        ,(drug==2)&(disease==1)
>        ,(drug==3)&(disease==1)
>        ,(drug==1)&(disease==1));
> x_interaction_2 <- residuals(lm(x_interaction~disease+drug));
>
> ## Compare to http://www.otago.ac.nz/sas/stat/chap30/sect52.htm
> ## Type III SS: 2997.471860, 415.873046, 707.266259
> rbind(
>        c(      anova(lm(y~x_interaction+disease),lm(y~disease*drug))\$'Sum of Sq'[2]
>        ,       anova(lm(y~x_interaction+drug),lm(y~disease*drug))\$'Sum of Sq'[2]
>        ,       anova(lm(y~disease+drug),lm(y~disease*drug))\$'Sum of Sq'[2])
> ,       c(      anova(lm(y~x_interaction_2+disease),lm(y~disease*drug))\$'Sum of Sq'[2]
>        ,       anova(lm(y~x_interaction_2+drug),lm(y~disease*drug))\$'Sum of Sq'[2]
>        ,       anova(lm(y~disease+drug),lm(y~disease*drug))\$'Sum of Sq'[2])
> )
>
>
>
>
> On Tue, Nov 18, 2008 at 3:27 AM, Xiaoxu LI <lixiaoxu at gmail.com> wrote:
>> ## Question1: How to define IV with interaction alone, without main effects?
>> ## Question2: Should Type III ANOVA in package car be independent of
>> the factor level order?
>>
>> ## data from http://www.otago.ac.nz/sas/stat/chap30/sect52.htm
>> drug <-  c(t(t(rep(1,3)))%*%t(1:4));
>> disease <- c(t(t(1:3)) %*% t(rep(1,4)));
>> y <- t(matrix(c(
>>         42     ,44     ,36     ,13     ,19     ,22
>>        ,33     ,NA     ,26     ,NA     ,33     ,21
>>        ,31     ,-3     ,NA     ,25     ,25     ,24
>>        ,28     ,NA     ,23     ,34     ,42     ,13
>>        ,NA     ,34     ,33     ,31     ,NA     ,36
>>        ,3      ,26     ,28     ,32     ,4      ,16
>>        ,NA     ,NA     ,1      ,29     ,NA     ,19
>>        ,NA     ,11     ,9      ,7      ,1      ,-6
>>        ,21     ,1      ,NA     ,9      ,3      ,NA
>>        ,24     ,NA     ,9      ,22     ,-2     ,15
>>        ,27     ,12     ,12     ,-5     ,16     ,15
>>        ,22     ,7      ,25     ,5      ,12     ,NA
>>        ),nrow=6));
>> ## verify data with http://www.otago.ac.nz/sas/stat/chap30/sect52.htm
>> (cbind(drug,disease,y));
>> ## make a big table
>> drug <- as.factor(rep(drug,6));
>> disease <- as.factor(rep(disease,6));
>> y <- c(y);
>> ## verify data through type I ANOVA to
>> http://www.otago.ac.nz/sas/stat/chap30/sect52.htm
>> anova(lm(y~drug*disease));
>>
>> require(car);
>> ## Type III ANOVA in package car is not Type III ANOVA in SAS
>> Anova(lm(y~drug*disease),type='III');
>> ## Type III ANOVA in package car equates to wishful
>> ## "anova(lm(y~INTERACTION +disease),lm(y~drug*disease))"
>> ## However in R, df of lm(y~drug:disease) is automatically df of
>> lm(y~drug*disease)
>> ## How to define IV with interaction alone, without main effects?
>>
>> ## Verify type III of package car to be (INTERACTION + disease) vs.
>> (disease*drug)
>> ## However at the 3rd row, replace the maximun levels(drug==4,
>> disease=3) with the minimum levels (==1).
>> rbind(Anova(lm(y~drug*disease),type='III')\$'Sum Sq'[2:4]
>> ,       c(
>>                anova(
>>                        lm(y~ 1
>>                                +       (       I((drug==2)&(disease==2))
>>                                        +       I((drug==3)&(disease==2))
>>                                        +       I((drug==4)&(disease==2))
>>                                        +       I((drug==2)&(disease==3))
>>                                        +       I((drug==3)&(disease==3))
>>                                        +       I((drug==4)&(disease==3))
>>                                        )
>>                                +       (       I(disease==2) + I(disease==3)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>        ,       anova(
>>                        lm(y~ 1
>>                                +       (       I((drug==2)&(disease==2))
>>                                        +       I((drug==3)&(disease==2))
>>                                        +       I((drug==4)&(disease==2))
>>                                        +       I((drug==2)&(disease==3))
>>                                        +       I((drug==3)&(disease==3))
>>                                        +       I((drug==4)&(disease==3))
>>                                        )
>>                                +       (       I(drug==2) + I(drug==3) +  I(drug==4)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>        ,       anova(lm(y~drug+disease),lm(y~drug*disease))\$'Sum of Sq'[2]
>>        )
>> ,
>>        c(
>>                anova(
>>                        lm(y~ 1
>>                                +       (       I((drug==2)&(disease==2))
>>                                        +       I((drug==3)&(disease==2))
>>                                        +       I((drug==1)&(disease==2))
>>                                        +       I((drug==2)&(disease==1))
>>                                        +       I((drug==3)&(disease==1))
>>                                        +       I((drug==1)&(disease==1))
>>                                        )
>>                                +       (       I(disease==2) + I(disease==1)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>        ,       anova(
>>                        lm(y~ 1
>>                                +       (       I((drug==2)&(disease==2))
>>                                        +       I((drug==3)&(disease==2))
>>                                        +       I((drug==1)&(disease==2))
>>                                        +       I((drug==2)&(disease==1))
>>                                        +       I((drug==3)&(disease==1))
>>                                        +       I((drug==1)&(disease==1))
>>                                        )
>>                                +       (       I(drug==2) + I(drug==3) +  I(drug==1)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>        ,       anova(lm(y~drug+disease),lm(y~drug*disease))\$'Sum of Sq'[2]
>>        )
>> )
>>
>> ## I don't know whether the problem is in anova(lm1,lm2) or lm, or Anova
>>
>> ## while type II is independent of level order --
>> rbind(Anova(lm(y~drug*disease),type='II')\$'Sum Sq'[1:3]
>> ,       c(
>>                anova(
>>                        lm(y~ 1+disease)
>>                        ,lm(y~ 1
>>                                +       (       I(drug==2)
>>                                        +       I(drug==3)
>>                                        +       I(drug==4)
>>                                        )
>>                                +       (       I(disease==2) + I(disease==3)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>        ,       anova(
>>                        lm(y~ 1+drug)
>>                        ,lm(y~ 1
>>                                +       (       I(drug==2)
>>                                        +       I(drug==3)
>>                                        +       I(drug==4)
>>                                        )
>>                                +       (       I(disease==2) + I(disease==3)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>
>>        ,       anova(lm(y~drug+disease),lm(y~drug*disease))\$'Sum of Sq'[2]
>>        )
>> ,
>>        c(
>>                anova(
>>                        lm(y~ 1+disease)
>>                        ,lm(y~ 1
>>                                +       (       I(drug==2)
>>                                        +       I(drug==3)
>>                                        +       I(drug==1)
>>                                        )
>>                                +       (       I(disease==2) + I(disease==1)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>        ,       anova(
>>                        lm(y~ 1+drug)
>>                        ,lm(y~ 1
>>                                +       (       I(drug==2)
>>                                        +       I(drug==3)
>>                                        +       I(drug==1)
>>                                        )
>>                                +       (       I(disease==2) + I(disease==1)))
>>                        ,lm(y~drug*disease))\$'Sum of Sq'[2]
>>
>>        ,       anova(lm(y~drug+disease),lm(y~drug*disease))\$'Sum of Sq'[2]
>>        )
>> )
>>
>> ## LI, Xiaoxu
>> ## School of Humanities & Social Sciences, Shenzhen Graduate School of
>> Peking Univ.
>> ## http://lixiaoxu.lxxm.com/
>>
>

```