[R] Issues with lapply and for loop Compared to Running Function

Kevin Egan kev|neg@n31 @end|ng |rom gm@||@com
Wed Sep 23 16:32:30 CEST 2020


Hello,

I’d like to apologise as I understand that this is a significant amount of code, but I am struggling to understand why my code develops an error when running. I have been able to obtain results for the list of matrices named xdot and ydot but am struggling with zdot as I keep getting the error "Error in seq.default(sbeta) : 'from' must be a finite number “ when trying to run either a for loop or lapply through the list of matrices. However, when I run the the bootstrap function on one of the many matrices it provides results. Is there a way to fix this issue? I’m confused as to why it works normally but not when running lapply or a for loop

set.seed(123)    # seed for reproducibility
library(boot)
library(np) # used for b.star
library(dplyr) # Used for Centering and Standardizing
library(deSolve) # ODE
library(lars)

# linear 3D system
Linear3D_derivative <- function(n, eta, polyorder){
 n <- round(n, 0)
 # n = number of time points rounded to nearest integer
 # noise = noise to be added
 # polyorder = degree of polynomial
 times.3D <- seq(0, ((n)-1)*0.01, by = 0.01)
 A.linear3D <- matrix(c(-0.1, -2, 0,
                        2, -0.1, 0, 
                        0, 0, -0.3), 3, 3) 
 state.linear3D <- c(X = 2, Y = 0, Z = 1) 
 linear3D <- function(t, A, b) {
   with(as.list(c(A, b)), {
     dX <- A %*% b
     list(c(dX))
   })
 }
 out.linear3D <- ode(y = state.linear3D, times = times.3D, 
                     func = linear3D, parms = A.linear3D)

 out.linear3D <- out.linear3D[,-1]
 out.linear3D.sorted <- data.frame(out.linear3D)[-1]
 out.linear3D.sorted <- out.linear3D[,c(3,2,1)] # Rearrange for Theta matrix: X0.0.1 = x, X0.1.0 = y, X1.0.0 = z

 # Polynomial Expansion
 expanded.theta <- polym(as.matrix(out.linear3D.sorted), degree = polyorder, raw = T)

 # Order by degree using as.numeric_version
 # numeric_version allows to convert names of variables and expand without limit
 ordered.results <- order(attr(expanded.theta, "degree"),
                          as.numeric_version(colnames(expanded.theta)))

 # Sort Theta Matrix
 sorted.theta <- expanded.theta[,ordered.results]
 sorted.theta <- data.frame(sorted.theta)
 # Change Variable Names
 s <- strsplit(substring(colnames(sorted.theta), 2), "\\.")
 colnames(sorted.theta) <- sapply(s, function(x){
   vec <- c("x", "y", "z")[seq_along(x)]
   x <- as.integer(x)
   y <- rep(vec, rev(x))
   paste(y, collapse = "")
 })

 # Add ones column to theta matrix
 sorted.theta <- data.frame(1, sorted.theta)
 # That lost the attributes, so put them back
 attr(sorted.theta, "degree") <- c(0, attr(expanded.theta, "degree")[ordered.results])
 sorted.theta <- sorted.theta[,order(attr(sorted.theta, "degree"), colnames(sorted.theta))]
 # That lost the attributes again, so put them back
 attr(sorted.theta, "degree") <- c(0, attr(expanded.theta, "degree")[ordered.results])

 # Create Derivative matrix
 dx <- matrix(NA, nrow = nrow(out.linear3D.sorted), ncol = ncol(out.linear3D.sorted))
 for (i in 1:nrow(out.linear3D.sorted)){
   # lorenz returns a list with one element. To assign to dx you have extract the list element using [[1]]
   dx[i,] <- linear3D(0, A.linear3D, out.linear3D[i,])[[1]]
 }
 # Add Noise
 length <- nrow(dx) * ncol(dx)
 dx <- dx + eta*matrix(rnorm(length, mean = 0, sd = 1), nrow(dx))

 # Derivative Variables
 xdot <- dx[,1]
 ydot <- dx[,2]
 zdot <- dx[,3]

 # Combine Matrices
 xdot.df <- data.frame(cbind(xdot, sorted.theta))
 ydot.df  <- data.frame(cbind(ydot, sorted.theta))
 zdot.df <- data.frame(cbind(zdot, sorted.theta))

 # Center y, X will be standardized in the modelling function
 y.xdot <- xdot.df %>% dplyr::select(xdot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix()
 X.xdot <- xdot.df %>% dplyr::select(-xdot) %>% as.matrix()
 xdot.matrix <- as.matrix(cbind(y.xdot,X.xdot))
 colnames(xdot.matrix)[which(colnames(xdot.matrix) == "X1")] <- "1"

 y.ydot <- ydot.df %>% dplyr::select(ydot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix()
 X.ydot <- ydot.df %>% dplyr::select(-ydot) %>% as.matrix()
 ydot.matrix <- as.matrix(cbind(y.ydot,X.ydot))
 colnames(ydot.matrix)[which(colnames(ydot.matrix) == "X1")] <- "1"

 y.zdot <- zdot.df %>% dplyr::select(zdot) %>% scale(center = TRUE, scale = FALSE) %>% as.matrix()
 X.zdot <- zdot.df %>% dplyr::select(-zdot) %>% as.matrix()
 zdot.matrix <- as.matrix(cbind(y.zdot,X.zdot))
 colnames(zdot.matrix)[which(colnames(zdot.matrix) == "X1")] <- "1"

 return(list(xdot.matrix=xdot.matrix, ydot.matrix = ydot.matrix, zdot.matrix = zdot.matrix, col.names = colnames(X.xdot), 
             degree.theta = attr(sorted.theta, "degree")))
}
# lars alasso step BIC with OLS weights
lars_alasso_OLSweights_fn <- function(data,index){ #index is the bootstrap sample index
 x <- data[index,-1]
 y <- data[index,1]
 m <-ncol(x)
 n <-nrow(x)
 x <- as.matrix(x)
 #  standardize variables like lars does 
 one <- rep(1, n)
 meanx <- drop(one %*% x)/n
 xc <- scale(x, meanx, FALSE)         # first subtracts mean
 xc[,1] <- 1
 normx <- sqrt(drop(one %*% (xc^2)))
 names(normx) <- NULL
 xs <- scale(xc, FALSE, normx)  
 xs[,1] <- 1
 # Perform OLS for weights vector
 out.ls<-lm(y~xs)                      # ols fit on standardized
 beta.ols<-out.ls$coeff[2:(m+1)]       # ols except for intercept
 # Some Coefficients may be NA so make them 0
 beta.ols[is.na(beta.ols)] <- 0
 w<-abs(beta.ols)      
 # Scale x using weights vector
 xs2 <- scale(xs,center=FALSE,scale=1/w)
 xs2[,1] <- 1
 object <- lars(xs2,y,type="lasso",normalize=FALSE)
 bic <- ((log(n)/(n))*object$df)+(as.vector(object$RSS)/(n)) # Tibshirani On the degrees of freedom
 step.bic <- which.min(bic)
 #bic <- log(n)*object$df+n*log(as.vector(object$RSS)/n)
 coeff <- as.vector(coef(object, s = step.bic, mode= "step"))
 # coeff <- predict.lars(object,xs2,s=step.bic,type="coef",mode="step")$coefficients
 # get back in right scale by multiplying by weights vector
 coeff <- coeff*w/normx                 
 coeff[is.na(coeff)] <- 0
 coef <- as.vector(coeff)
 return(coef)
}
# lars alasso step BIC with OLS weights plus OLS post selection
lars_alassoOLS_OLSweights_fn <- function(data,index){ #index is the bootstrap sample index
 x <- data[index,-1]
 y <- data[index,1]
 m <-ncol(x)
 n <-nrow(x)
 x <- as.matrix(x)
 #  standardize variables like lars does 
 one <- rep(1, n)
 meanx <- drop(one %*% x)/n
 xc <- scale(x, meanx, FALSE)         # first subtracts mean
 xc[,1] <- 1
 normx <- sqrt(drop(one %*% (xc^2)))
 names(normx) <- NULL
 xs <- scale(xc, FALSE, normx)  
 xs[,1] <- 1
 # Perform OLS for weights vector
 out.ls<-lm(y~xs)                      # ols fit on standardized
 beta.ols<-out.ls$coeff[2:(m+1)]       # ols except for intercept
 # Some Coefficients may be NA so make them 0
 beta.ols[is.na(beta.ols)] <- 0
 w <- abs(beta.ols)      
 # Scale xs using weights vector
 xs2 <- scale(xs,center=FALSE,scale=1/w)
 xs2[,1] <- 1
 object <- lars(xs2,y,type="lasso",normalize=FALSE)
 bic <- ((log(n)/(n))*object$df)+(as.vector(object$RSS)/(n)) # Tibshirani On the degrees of freedom
 # bic <- log(n)*object$df+n*log(as.vector(object$RSS)/n)
 step.bic <- which.min(bic)
 coeff <- as.vector(coef(object,s=step.bic,mode="step"))
 # coeff <- predict.lars(object,xs2,s=step.bic,type="coef",mode="step")$coefficients
 coeff <- coeff*w/normx                 # get back in right scale by multiplying by weights vector
 coeff[is.na(coeff)] <- 0
 coef_nonzero <- as.vector(coeff) != 0
 if(all(coef_nonzero == F)){
   ls_coef <- coeff
 } else{
   ls.obj <- glm(y~x[, coef_nonzero, drop = FALSE])
   ls_coef <- (ls.obj$coefficients)[-1]
 }
 vect_coef <- rep(0,length(coef_nonzero))
 vect_coef[coef_nonzero] <- ls_coef
 return(vect_coef)
}
### Non-parametric bootstrap
# Determine block length using b.star function
bootstrap_function <- function(data, alpha, polynomial.orders){
 # polynomial.orders <- attr(expanded.theta, "degree")[ordered.results] degree of variables 

 bstar <- b.star(data[,1], round = TRUE) # Block length for derivative variable
 blocklength <- bstar[,2] # Select Block Length of cirular block result

 # tsboot Function on Original Dataframe
 # Run tsboot on function using blocklength of derivative variable
 alasso.boot.ts <- tsboot(data,lars_alasso_OLSweights_fn, R=1000, 
                          sim = "fixed", l = blocklength)
 alasso.boot.t0<- alasso.boot.ts$t0 # the estimator from original data set
 alasso.boot.t <- alasso.boot.ts$t # Matrix of coefficients from bootstrap samples

 # Determine number of variables with polynomial degree less than or equal to 
 ## largest non-zero column polynomial degree
 alasso.boot.t.nonzero <- apply(alasso.boot.t, 2, function(c)sum(c!=0))
 alasso.boot.t.nz.max <- max(which(alasso.boot.t.nonzero!=0))
 #add 1 for ones column
 new.theta.order <- sum(polynomial.orders<=polynomial.orders[alasso.boot.t.nz.max]) 

 # Rerun Bootstrap with Truncated Matrix
 post.boot.matrix <- data[,0:new.theta.order+1] # adding 1 to include derivative variable
 alassoOLS.boot.ts <- tsboot(post.boot.matrix,lars_alassoOLS_OLSweights_fn,
                             R=1000, sim = "fixed", l = blocklength)
 alassoOLS.boot.t0<- alassoOLS.boot.ts$t0 # the estimator from iterative data set
 alassoOLS.boot.t <- alassoOLS.boot.ts$t

 variables <- length(alassoOLS.boot.t0) # Number of Variables considered
 coef.nonzero <- length(which(alassoOLS.boot.t0 != 0)) # variables selected
 q <- (alpha*coef.nonzero)/variables # q = alpha
 B <- alassoOLS.boot.ts$R # Number of bootstraps
 q1 <- (B*q)/2 # Lower bound
 q2 <- B-q1+1 # Upper bound
 # Sort and determine value of lower and upper bound 
 bound.percentile <- apply(alassoOLS.boot.t,2, function(u){CI = sort(u)[c(round(q1,0),round(q2,0))]})
 # See if variables are within confidence intervals
 within.ci.check <- (alassoOLS.boot.t0 >=  bound.percentile[1,] & alassoOLS.boot.t0 <=  bound.percentile[2,])

 iterative.df.colnames <- colnames(post.boot.matrix)
 iterative.theta.colnames <- iterative.df.colnames[-c(1)]

 return(list(ci=bound.percentile, point.estimates = alassoOLS.boot.t0, within.ci.check = within.ci.check, point.estimate.colnames = iterative.theta.colnames))
}

s <- seq(4.77, 5, by = 0.01)
k <- 10^s

systems <- lapply(k, eta = 0,  polyorder = 3, Linear3D_derivative)

xdot <- lapply(systems, `[[`, 1) 
ydot <- lapply(systems, `[[`, 2)
zdot <- lapply(systems, `[[`, 3)

zdot.results <- lapply(zdot, alpha = 0.05, polynomial.orders = systems[[1]]$degree.theta, bootstrap_function)

I’ve also tried to use mclapply to speed up the process.

Thanks


More information about the R-help mailing list