## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----installation, eval=FALSE------------------------------------------------- # if (!requireNamespace("devtools", quietly = TRUE)) { # install.packages("devtools") # } # devtools::install_github("Chukyhenry/PosiR") ## ----eval=FALSE--------------------------------------------------------------- # install.packages("pbapply") # install.packages("glmnet") ## ----------------------------------------------------------------------------- if (!requireNamespace("dplyr", quietly = TRUE)) { stop("Please install the 'dplyr' package to run this vignette.") } library(PosiR) library(dplyr) library(stats) ## ----message=FALSE------------------------------------------------------------ set.seed(123) # for reproducibility X <- matrix(rnorm(200 * 2), ncol = 2, dimnames = list(NULL, c("Age", "Income"))) y <- 0.5 * X[, "Age"] + rnorm(200) # the true intercept is 0, the true coefficient for Age is 0.5, and the true coefficient for Income is 0. ## ----------------------------------------------------------------------------- Q <- list( Age_only = 1, # Model with Intercept + Age Age_Income = c(1, 2) # Model with Intercept + Age + Income ) # Indices refer to columns of X: 1="Age", 2="Income" ## ----warning=FALSE, message=FALSE--------------------------------------------- result <- simultaneous_ci(X, y, Q, B = 1000, cores = 2, verbose = FALSE) ## ----------------------------------------------------------------------------- # Show the calculated critical value print(paste("Calculated K_alpha:", round(result$K_alpha, 3))) # Display the intervals data frame print(result$intervals) ## ----------------------------------------------------------------------------- plot(result, las.labels = 2, col.ci = "darkblue", main = "Simultaneous Confidence Intervals") ## ----------------------------------------------------------------------------- data(mtcars) X_mtcars <- as.matrix(mtcars[, c("hp", "wt", "qsec")]) y_mtcars <- mtcars$mpg ## ----------------------------------------------------------------------------- Q_mtcars <- list( HP_only = 1, # Model: mpg ~ Intercept + hp WT_only = 2, # Model: mpg ~ Intercept + wt QSEC_only = 3, # Model: mpg ~ Intercept + qsec Full_Model = 1:3 # Model: mpg ~ Intercept + hp + wt + qsec ) # Indices: 1="hp", 2="wt", 3="qsec" ## ----------------------------------------------------------------------------- result_mtcars <- simultaneous_ci(X = X_mtcars, y = y_mtcars, Q_universe = Q_mtcars, B = 2000, cores = 2, seed = 123, verbose = FALSE) # Display the critical value print(paste("Calculated K_alpha:", round(result_mtcars$K_alpha, 3))) ## ----message=FALSE------------------------------------------------------------ z_alpha_2 <- stats::qnorm(0.975) intervals_comparison <- result_mtcars$intervals %>% mutate( naive_lower = estimate - z_alpha_2 * se_nqj, naive_upper = estimate + z_alpha_2 * se_nqj ) %>% select(model_id, coefficient_name, estimate, lower, upper, naive_lower, naive_upper) %>% arrange(model_id, coefficient_name) # Ensure consistent ordering print(intervals_comparison, digits = 3) ## ----------------------------------------------------------------------------- plot(result_mtcars, las.labels = 2, main = "mtcars Data: Simultaneous CIs") ## ----------------------------------------------------------------------------- set.seed(123) n_lasso <- 100 p_lasso <- 50 X_lasso <- matrix(rnorm(n_lasso * p_lasso), n_lasso, p_lasso, dimnames = list(NULL, paste0("X", 1:p_lasso))) # True coefficients: 1 for first 5, 0 for rest beta_true <- c(rep(1.0, 5), rep(0, p_lasso - 5)) true_intercept <- 0.5 # Generate response (linear model) y_lasso <- drop(true_intercept + X_lasso %*% beta_true + rnorm(n_lasso)) ## ----------------------------------------------------------------------------- # This chunk only runs if glmnet is installed if (requireNamespace("glmnet", quietly = TRUE)) { library(glmnet) # Code using glmnet } else { message("glmnet is not available. Skipping this example.") } library(glmnet) # Use cv.glmnet to find optimal lambda cv_lasso_fit <- cv.glmnet(X_lasso, y_lasso, alpha = 1) # alpha=1 for Lasso # Get coefficients at lambda.min, excluding the intercept lasso_coeffs <- coef(cv_lasso_fit, s = "lambda.min")[-1, 1] # Identify indices of selected variables (non-zero coefficients) # Indices are relative to the columns of X_lasso (1 to p_lasso) select_var_index <- which(lasso_coeffs != 0) # Get names of selected variables select_var_names <- names(select_var_index) cat("Lasso selected variables (indices in X_lasso):", paste(select_var_index, collapse = ", "), "\n") cat("Lasso selected variables (names):", paste(select_var_names, collapse = ", "), "\n") # If no variables selected (unlikely here), handle gracefully for Q definition if (length(select_var_index) == 0) { warning("Lasso selected no variables. Defining Q with intercept-only and full model.") select_var_index1 <- 1 # Just intercept index for design matrix } else { # IMPORTANT: Need indices relative to the design matrix used in simultaneous_ci # which includes intercept as column 1. Add 1 to Lasso indices. select_var_index1 <- c(1, select_var_index + 1) } ## ----------------------------------------------------------------------------- # Fallback if glmnet is not installed if (!requireNamespace("glmnet", quietly = TRUE)) { warning("glmnet package not found. Using arbitrary selection for vignette.", call. = FALSE) select_var_index <- c(1, 2, 3, 4, 5, 9, 13, 18, 22, 24, 26, 29, 33, 36, 41) select_var_names <- paste0("X", select_var_index) select_var_index1 <- c(1, select_var_index + 1) } ## ----------------------------------------------------------------------------- # Indices for X in simultaneous_ci context (assuming add_intercept=TRUE) # Intercept is 1, X1 is 2, ..., Xp is p+1 full_model_indices <- 1:(p_lasso + 1) Q_lasso <- list( # Model selected by Lasso (Intercept + selected X's) LassoSelected = select_var_index1, # Full model (Intercept + All X's) - optional, but common for context FullModel = full_model_indices ) # Check if Lasso selection was empty if (length(select_var_index1) == 1 && select_var_index1 == 1) { Q_lasso$LassoSelected <- 1 # Ensure it's just the intercept index } print("Models in Q_lasso (indices refer to design matrix with intercept):") # Print only first few indices for brevity if models are large print(lapply(Q_lasso, function(idx) if(length(idx)>10) c(idx[1:5], "...", idx[length(idx)]) else idx)) ## ----------------------------------------------------------------------------- # Note: We pass the original X matrix (without intercept) to simultaneous_ci # It will add the intercept based on add_intercept=TRUE result_lasso <- simultaneous_ci( X_lasso, y_lasso, Q_lasso, B = 2000, # Recommend B=2000+ for high-dim cores = 2, seed = 123, verbose = FALSE ) ## ----------------------------------------------------------------------------- intervals_lasso_comp <- result_lasso$intervals %>% filter(model_id == "LassoSelected") %>% # Focus on the Lasso model mutate( naive_lower = estimate - qnorm(0.975) * se_nqj, naive_upper = estimate + qnorm(0.975) * se_nqj ) %>% select(coefficient_name, estimate, lower, upper, naive_lower, naive_upper) %>% arrange(match(coefficient_name, c("(Intercept)", paste0("X", 1:p_lasso)))) # Order numerically print(intervals_lasso_comp, digits = 3) ## ----------------------------------------------------------------------------- # Plot only coefficients from the Lasso-selected model selected_plot <- result_lasso$intervals %>% filter(model_id == "LassoSelected") %>% pull(coefficient_name) plot(result_lasso, subset_pars = selected_plot, main = "Simultaneous CIs for Lasso-Selected Model", las.labels = 1)