[R] RandomForest tuning the parameters

Eric Berger er|cjberger @end|ng |rom gm@||@com
Tue May 9 09:44:25 CEST 2023


Hi Sacha,
On second thought, perhaps this is more the direction that you want ...

X2 = cbind(X_train,y_train)
colnames(X2)[3] = "y"
regr2<-randomForest(y~x1+x2, data=X2,maxnodes=10, ntree=10)
regr
regr2

#Make prediction
predictions= predict(regr, X_test)
predictions2= predict(regr2, X_test)

HTH,
Eric


On Tue, May 9, 2023 at 6:40 AM Eric Berger <ericjberger using gmail.com> wrote:

> Hi,
> One problem you have is with the command:
> regr<-randomForest(y~x1+x2, data=X_train, proximity=TRUE)
>
> What you need is something like this:
>
> X2 <- cbind(X,y)
> regr<-randomForest(y~x1+x2, data=X2, proximity=TRUE)
>
> HTH,
> Eric
>
>
> On Mon, May 8, 2023 at 11:11 PM varin sacha via R-help
> <r-help using r-project.org> wrote:
> >
> > Dear R-experts,
> >
> > Here below a toy example with some error messages, especially at the end
> of the code (Tuning the parameters). Your help to correct my R code would
> be highly appreciated.
> >
> >
> > #######################################
> > #libraries
> > library(lattice)
> > library(ggplot2)
> > library(caret)
> > library(randomForest)
> >
> > #Data
> >
> y=c(23,34,32,12,24,35,45,56,76,87,54,34,23,45,41,13,16,98,35,65,56,67,78,89,87,64,53,31,14,34,45,46,57,69,90,80,70,65,50,45,60,56,87,79,64,34,25,47,61,24,10,13,12,15,46,58,76,89,90,98)
> >
> x1=c(4,5,6,7,1,10,19,20,21,14,23,6,5,32,15,12,16,14,2,3,4,5,3,2,1,2,6,7,5,4,3,2,1,3,4,6,7,9,5,4,3,7,10,11,12,13,10,3,2,5,6,9,8,7,4,12,15,16,2,3)
> >
> x2=c(0,0,0,1,1,0,1,1,0,1,1,0,0,1,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,1,0,0,0,1,1,1,1,0,1,0,1,0,0,1,1,0,0,1,0,0,1,1)
> >
> > y=as.numeric(y)
> > x1=as.numeric(x1)
> > x2=as.factor(x2)
> >
> > X=data.frame(x1,x2)
> > y=y
> >
> > #Split data into training and test sets
> > index=createDataPartition(y, p=0.75, list=FALSE)
> > X_train = X[index, ]
> > X_test = X[-index, ]
> > y_train= y[index ]
> > y_test = y[-index ]
> >
> > #Train de model
> > regr=randomForest (x=X_train, y=y_train, maxnodes=10, ntree=10)
> >
> > regr<-randomForest(y~x1+x2, data=X_train, proximity=TRUE)
> > regr
> >
> > #Make prediction
> > predictions= predict(regr, X_test)
> >
> > result= X_test
> > result['y'] = y_test
> > result['prediction'] = predictions
> > result
> >
> > # Import library for Metrics
> > library(Metrics)
> >
> > print(paste0('MAE: ' , mae(y_test,predictions) ))
> > print(paste0('MSE: ' ,caret::postResample(predictions ,
> y_test)['RMSE']^2 ))
> > print(paste0('R2: ' ,caret::postResample(predictions ,
> y_test)['Rsquared'] ))
> >
> >
> > #Tuning the parameters
> > N=500 #length(X_train)
> > X_train_ = X_train[1:N , ]
> > y_train_ = y_train[1:N]
> >
> > seed <-7
> > metric<-'RMSE'
> >
> > customRF <- list(type = "Regression", library = "randomForest", loop =
> NULL)
> >
> > customRF$parameters <- data.frame(parameter = c("maxnodes", "ntree"),
> class = rep("numeric", 2), label = c("maxnodes", "ntree"))
> >
> > customRF$grid <- function(x, y, len = NULL, search = "grid") {}
> >
> > customRF$fit <- function(x, y, wts, param, lev, last, weights,
> classProbs, ...) {
> >
> >  randomForest(x, y, maxnodes = param$maxnodes, ntree=param$ntree, ...)
> >
> > }
> >
> > customRF$predict <- function(modelFit, newdata, preProc = NULL,
> submodels = NULL)
> >
> > predict(modelFit, newdata)
> >
> > customRF$prob <- function(modelFit, newdata, preProc = NULL, submodels =
> NULL)
> >
> >   predict(modelFit, newdata, type = "prob")
> >
> > customRF$sort <- function(x) x[order(x[,1]),]
> >
> > customRF$levels <- function(x) x$classes
> >
> >
> > # Set grid search parameters
> > control <- trainControl(method="repeatedcv", number=10, repeats=3,
> search='grid')
> >
> > # Outline the grid of parameters
> > tunegrid <- expand.grid(.maxnodes=c(10,20,30,50), .ntree=c(100, 200,
> 300))
> > set.seed(seed)
> >
> > # Train the model
> > rf_gridsearch <- train(x=X_train_, y=y_train_, method=customRF,
> metric=metric, tuneGrid=tunegrid, trControl=control)
> >
> > plot(rf_gridsearch)
> >
> > rf_gridsearch$bestTune
> >
> > #################################################
> >
> > ______________________________________________
> > R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
> > https://stat.ethz.ch/mailman/listinfo/r-help
> > PLEASE do read the posting guide
> http://www.R-project.org/posting-guide.html
> > and provide commented, minimal, self-contained, reproducible code.
>

	[[alternative HTML version deleted]]



More information about the R-help mailing list