[R] bug in rpart?

X. Li xl_goskins at yahoo.com
Tue Jun 14 21:30:08 CEST 2005


Dear R-helpers,

Can you help me to see why "code 1" gives error
while "code 2" runs fine?  The only difference in
the data is the distribution of age categories.
I am attaching the session after the code.

Many thanks. 

XL

library(survival)
library(rpart)
# code 1
n <- 20 
age <- rep(1:3, c(2, 3, 15))
eg<- data.frame(rexp(n), rbinom(n,1,prob=.3), age=age)
                     
names(eg) <- c("surv", "status", "age")
rpart(Surv(surv, status)~age, data=eg)

# code 2
n <- 20 
age <- rep(1:3, c(5, 5, 10)) 
eg<- data.frame(rexp(n), rbinom(n,1,prob=.3), age=age)
                     
names(eg) <- c("surv", "status", "age")
rpart(Surv(surv, status)~age, data=eg)

# my session:

> library(rpart)
> # code 1
> n <- 20 
> age <- rep(1:3, c(2, 3, 15))
> eg<- data.frame(rexp(n), rbinom(n,1,prob=.3),
age=age)                      
> names(eg) <- c("surv", "status", "age")
> rpart(Surv(surv, status)~age, data=eg)
Error in "$<-.data.frame"(`*tmp*`, "yval2", value =
c(1, 7)) : 
        replacement has 2 rows, data has 1
> 
> # code 2
> n <- 20 
> age <- rep(1:3, c(5, 5, 10)) 
> eg<- data.frame(rexp(n), rbinom(n,1,prob=.3),
age=age)                      
> names(eg) <- c("surv", "status", "age")
> rpart(Surv(surv, status)~age, data=eg)
n= 20 

node), split, n, deviance, yval
      * denotes terminal node

1) root 20 19.007310 1.0000000  
  2) age>=2.5 10  9.673372 0.8230355 *
  3) age< 2.5 10  9.027225 1.1922660 *




More information about the R-help mailing list