[R] (no subject)

Mon Jul 30 17:24:27 CEST 2018

The book "Introduction to Statistical Learning" gives R scripts for its
labs. I found a script for ridge regression that works on the dataset the
book uses but is unusable on other datasets I own unless I clean the data.

I'm trying to understand the syntax for I need for data cleaning and am
stuck. I want to learn to do ridge regression. I tried using my own data
set on this script rather than the book example but get errors. If you use
your own data set rather than the Hitters dataset, then you'll get errors
unless you format your code. How do I change this script or clean any
dataset so that this script for ridge regression useable for all datasets?

    library(ISLR)

    fix(Hitters)

    names(Hitters)

    dim(Hitters)

    sum(is.na(Hitters$Salary))

    Hitters=na.omit(Hitters)

    dim(Hitters)

    sum(is.na(Hitters))

    library(leaps)

    x=model.matrix(Salary~.,Hitters)[,-1]

    y=Hitters$Salary

    # Ridge Regression

    library(glmnet)

    grid=10^seq(10,-2,length=100)

    ridge.mod=glmnet(x,y,alpha=0,lambda=grid)

    dim(coef(ridge.mod))

    ridge.mod$lambda[50]

    coef(ridge.mod)[,50]

    sqrt(sum(coef(ridge.mod)[-1,50]^2))

    ridge.mod$lambda[60]

    coef(ridge.mod)[,60]

    sqrt(sum(coef(ridge.mod)[-1,60]^2))

    predict(ridge.mod,s=50,type="coefficients")[1:20,]

    set.seed(1)

    train=sample(1:nrow(x), nrow(x)/2)

    test=(-train)

    y.test=y[test]

    ridge.mod=glmnet(x[train,],y[train],alpha=0,lambda=grid, thresh=1e-12)

    ridge.pred=predict(ridge.mod,s=4,newx=x[test,])

    mean((ridge.pred-y.test)^2)

    mean((mean(y[train])-y.test)^2)

    ridge.pred=predict(ridge.mod,s=1e10,newx=x[test,])

    mean((ridge.pred-y.test)^2)

    ridge.pred=predict(ridge.mod,s=0,newx=x[test,],exact=T)

    mean((ridge.pred-y.test)^2)

    lm(y~x, subset=train)

    predict(ridge.mod,s=0,exact=T,type="coefficients")[1:20,]

    set.seed(1)

    cv.out=cv.glmnet(x[train,],y[train],alpha=0)

    plot(cv.out)

    bestlam=cv.out$lambda.min

    bestlam

    ridge.pred=predict(ridge.mod,s=bestlam,newx=x[test,])

    mean((ridge.pred-y.test)^2)

    out=glmnet(x,y,alpha=0)

    predict(out,type="coefficients",s=bestlam)[1:20

	[[alternative HTML version deleted]]