## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6 ) ## ----setup-------------------------------------------------------------------- library(manureshed) ## ----custom_thresholds, eval=FALSE-------------------------------------------- # # More restrictive (exclude more areas) # results_conservative <- run_builtin_analysis( # scale = "huc8", # year = 2016, # nutrients = "nitrogen", # cropland_threshold = 2000 # 2000 acres instead of 1234 # ) # # # Less restrictive (include more areas) # results_liberal <- run_builtin_analysis( # scale = "huc8", # year = 2016, # nutrients = "nitrogen", # cropland_threshold = 500 # 500 acres # ) # # # Compare the difference # conservative_excluded <- sum(results_conservative$agricultural$N_class == "Excluded") # liberal_excluded <- sum(results_liberal$agricultural$N_class == "Excluded") # # print(paste("Conservative:", conservative_excluded, "excluded")) # print(paste("Liberal:", liberal_excluded, "excluded")) ## ----auto_threshold, eval=FALSE----------------------------------------------- # # Load data for threshold calculation # county_data <- load_builtin_nugis("county", 2016) # huc8_data <- load_builtin_nugis("huc8", 2016) # # # Calculate appropriate threshold for HUC8 # custom_threshold <- get_cropland_threshold( # scale = "huc8", # county_data = county_data, # target_data = huc8_data, # baseline_ha = 750 # Use 750 ha instead of 500 ha baseline # ) # # print(paste("Custom threshold:", round(custom_threshold, 2), "acres")) # # # Use in analysis # results <- run_builtin_analysis( # scale = "huc8", # year = 2016, # nutrients = "nitrogen", # cropland_threshold = custom_threshold # ) ## ----state_analysis, eval=FALSE----------------------------------------------- # # Analyze specific states # iowa_results <- run_state_analysis( # state = "IA", # scale = "county", # year = 2016, # nutrients = c("nitrogen", "phosphorus"), # include_wwtp = TRUE # ) # # # Quick state analysis with maps # texas_results <- quick_state_analysis( # state = "TX", # scale = "huc8", # year = 2015, # nutrients = "nitrogen", # create_maps = TRUE, # create_networks = TRUE # ) ## ----multi_state, eval=FALSE-------------------------------------------------- # # Compare agricultural states # corn_belt_states <- c("IA", "IL", "IN", "NE", "OH") # state_results <- list() # # for (state in corn_belt_states) { # state_results[[state]] <- run_state_analysis( # state = state, # scale = "county", # year = 2016, # nutrients = "nitrogen", # include_wwtp = TRUE, # verbose = FALSE # Reduce output # ) # } # # # Compare state summaries # state_summary <- do.call(rbind, lapply(names(state_results), function(state) { # result <- state_results[[state]] # classes <- table(result$agricultural$N_class) # data.frame( # State = state, # Total_Counties = sum(classes), # Source_Counties = classes["Source"] %||% 0, # Sink_Counties = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE), # Source_Percent = round((classes["Source"] %||% 0) / sum(classes) * 100, 1) # ) # })) # # print(state_summary) ## ----batch_years, eval=FALSE-------------------------------------------------- # # Analyze multiple years efficiently # batch_results <- batch_analysis_years( # years = 2012:2016, # scale = "huc8", # nutrients = "nitrogen", # include_wwtp = TRUE, # create_comparative_plots = TRUE # ) # # # Check which years succeeded # successful_years <- names(batch_results$results) # print(paste("Successful years:", paste(successful_years, collapse = ", "))) ## ----parallel, eval=FALSE----------------------------------------------------- # # Use multiple CPU cores # parallel_results <- batch_analysis_parallel( # years = 2014:2016, # n_cores = 2, # Use 2 cores # scale = "county", # nutrients = "nitrogen", # include_wwtp = TRUE # ) # # # Check results # successful <- sum(!sapply(parallel_results, function(x) "error" %in% names(x))) # print(paste("Successful analyses:", successful, "out of", length(parallel_results))) ## ----enhanced_batch, eval=FALSE----------------------------------------------- # # Full batch analysis with all visualizations # enhanced_results <- batch_analysis_enhanced( # years = 2015:2016, # Use fewer years for demonstration # scale = "county", # nutrients = "nitrogen", # create_all_visualizations = TRUE, # create_comparative_plots = TRUE # ) ## ----benchmarking, eval=FALSE------------------------------------------------- # # Benchmark different configurations # benchmark_results <- benchmark_analysis( # scale = "county", # year = 2016, # nutrients = "nitrogen", # n_runs = 3, # include_wwtp = TRUE # ) # # print(benchmark_results) # # # Compare scales # scales_to_test <- c("county", "huc8", "huc2") # benchmark_comparison <- list() # # for (scale in scales_to_test) { # benchmark_comparison[[scale]] <- benchmark_analysis( # scale = scale, # year = 2016, # nutrients = "nitrogen", # n_runs = 2, # include_wwtp = TRUE # ) # } # # # Compare timing # timing_comparison <- data.frame( # Scale = scales_to_test, # Mean_Time_Seconds = sapply(benchmark_comparison, function(x) x$timing$mean), # Memory_MB = sapply(benchmark_comparison, function(x) x$memory_mb$mean) # ) # # print(timing_comparison) ## ----memory_management, eval=FALSE-------------------------------------------- # # Clear cache to free up space # clear_data_cache() # # # Check package health # health_status <- health_check(verbose = TRUE) # # # Test OSF connection # connection_ok <- test_osf_connection() # print(paste("OSF connection:", ifelse(connection_ok, "OK", "Failed"))) ## ----transition_analysis, eval=FALSE------------------------------------------ # # Run analysis # results <- run_builtin_analysis( # scale = "huc8", # year = 2016, # nutrients = "nitrogen", # include_wwtp = TRUE # ) # # # Calculate centroids for spatial analysis # centroids <- add_centroid_coordinates(results$integrated$nitrogen) # # # Calculate transition probabilities # agri_transitions <- calculate_transition_probabilities( # centroids, "N_class" # ) # # combined_transitions <- calculate_transition_probabilities( # centroids, "combined_N_class" # ) # # # Compare transition matrices # print("Agricultural transitions:") # print(agri_transitions) # # print("Combined (WWTP + Agricultural) transitions:") # print(combined_transitions) # # # Create network plots # create_network_plot( # agri_transitions, "nitrogen", "Agricultural", # "network_agricultural.png" # ) # # create_network_plot( # combined_transitions, "nitrogen", "Combined", # "network_combined.png" # ) ## ----spatial_stats, eval=FALSE------------------------------------------------ # # Calculate spatial statistics # library(sf) # # # Get results as spatial data # spatial_results <- results$integrated$nitrogen # # # Calculate areas of different classifications # class_areas <- spatial_results %>% # mutate(area_km2 = as.numeric(st_area(.)) / 1000000) %>% # st_drop_geometry() %>% # group_by(combined_N_class) %>% # summarise( # count = n(), # total_area_km2 = sum(area_km2), # mean_area_km2 = mean(area_km2), # .groups = 'drop' # ) # # print(class_areas) # # # Find neighboring relationships # neighbors <- st_touches(spatial_results) # neighbor_summary <- data.frame( # ID = spatial_results$ID, # N_neighbors = lengths(neighbors), # Class = spatial_results$combined_N_class # ) # # print("Average neighbors by classification:") # neighbor_summary %>% # group_by(Class) %>% # summarise(mean_neighbors = mean(N_neighbors), .groups = 'drop') ## ----custom_workflow, eval=FALSE---------------------------------------------- # # Example: Livestock-intensive regions analysis # analyze_livestock_regions <- function(states, year = 2016) { # # results <- list() # # for (state in states) { # # Custom threshold for livestock regions # county_data <- load_builtin_nugis("county", year) # # # Calculate state-specific threshold # state_county <- county_data[substr(county_data$ID, 1, 2) == get_state_fips(state), ] # custom_threshold <- quantile(state_county$cropland, 0.25) # 25th percentile # # # Run analysis # state_result <- run_state_analysis( # state = state, # scale = "county", # year = year, # nutrients = "nitrogen", # include_wwtp = TRUE, # cropland_threshold = custom_threshold, # verbose = FALSE # ) # # results[[state]] <- state_result # } # # return(results) # } # # # Apply to livestock states # livestock_states <- c("IA", "NE", "NC", "MN") # livestock_results <- analyze_livestock_regions(livestock_states, 2016) ## ----time_series, eval=FALSE-------------------------------------------------- # # Custom time series analysis # analyze_trends <- function(scale, years, nutrient = "nitrogen") { # # trend_data <- list() # # for (year in years) { # # Check if WWTP data available # use_wwtp <- year >= 2007 && year <= 2016 # # result <- run_builtin_analysis( # scale = scale, # year = year, # nutrients = nutrient, # include_wwtp = use_wwtp, # save_outputs = FALSE, # verbose = FALSE # ) # # # Extract classification summary # if (use_wwtp && "integrated" %in% names(result)) { # classes <- table(result$integrated[[nutrient]][[paste0("combined_", toupper(substr(nutrient, 1, 1)), "_class")]]) # } else { # classes <- table(result$agricultural[[paste0(toupper(substr(nutrient, 1, 1)), "_class")]]) # } # # trend_data[[as.character(year)]] <- list( # year = year, # wwtp_included = use_wwtp, # classes = classes, # source_count = classes["Source"] %||% 0, # sink_count = sum(classes[c("Sink_Deficit", "Sink_Fertilizer")], na.rm = TRUE) # ) # } # # return(trend_data) # } # # # Analyze nitrogen trends # nitrogen_trends <- analyze_trends("huc8", 2008:2016, "nitrogen") # # # Convert to data frame for plotting # trend_df <- do.call(rbind, lapply(nitrogen_trends, function(x) { # data.frame( # Year = x$year, # WWTP_Included = x$wwtp_included, # Source_Count = x$source_count, # Sink_Count = x$sink_count, # Total_Units = sum(x$classes) # ) # })) # # print(trend_df) ## ----data_export, eval=FALSE-------------------------------------------------- # # Export for GIS software # gis_files <- export_for_gis( # results, # output_dir = "gis_export", # formats = c("shapefile", "geojson") # ) # # # Export for publication # pub_files <- export_for_publication( # results, # output_dir = "publication_export", # dpi = 600 # ) # # # Export for policy makers # policy_files <- export_for_policy( # results, # output_dir = "policy_export" # ) # # print("Created files:") # print(c(gis_files, pub_files, policy_files)) ## ----package_integration, eval=FALSE------------------------------------------ # # Example: Using with tigris for custom boundaries # if (requireNamespace("tigris", quietly = TRUE)) { # # Get state boundary # ohio_boundary <- tigris::states() %>% # filter(STUSPS == "OH") %>% # st_transform(5070) # Transform to analysis CRS # # # Spatial filter results # ohio_watersheds <- st_filter(results$integrated$nitrogen, ohio_boundary) # # print(paste("Ohio watersheds:", nrow(ohio_watersheds))) # } # # # Example: Using with nhdplusTools for watershed delineation # if (requireNamespace("nhdplusTools", quietly = TRUE)) { # # Find watershed for a specific point # point <- st_sfc(st_point(c(-83.0458, 42.3314)), crs = 4326) # Detroit # watershed_info <- nhdplusTools::discover_nhdplus_id(point) # # # Filter results to this watershed # if (!is.null(watershed_info$huc8)) { # watershed_result <- results$integrated$nitrogen %>% # filter(ID == watershed_info$huc8) # print(watershed_result) # } # } ## ----advanced_validation, eval=FALSE------------------------------------------ # # Comprehensive quality check # validation_report <- list() # # # Check data completeness # validation_report$completeness <- list( # total_units = nrow(results$agricultural), # missing_n_class = sum(is.na(results$agricultural$N_class)), # excluded_percent = sum(results$agricultural$N_class == "Excluded", na.rm = TRUE) / # nrow(results$agricultural) * 100 # ) # # # Check balance calculations # if ("integrated" %in% names(results)) { # n_data <- results$integrated$nitrogen # validation_report$balance_check <- list( # impossible_sources = sum(n_data$combined_N_surplus <= 0 & # n_data$combined_N_class == "Source", na.rm = TRUE), # impossible_sinks = sum(n_data$combined_N_surplus > 0 & # n_data$combined_N_class %in% c("Sink_Deficit", "Sink_Fertilizer"), na.rm = TRUE) # ) # } # # # Check spatial validity # if (inherits(results$agricultural, "sf")) { # validation_report$spatial_check <- list( # invalid_geometries = sum(!st_is_valid(results$agricultural)), # crs = st_crs(results$agricultural)$input # ) # } # # print("Validation Report:") # print(str(validation_report)) ## ----performance_tips, eval=FALSE--------------------------------------------- # # 1. Use appropriate scales # # County: ~3000 units, good for policy analysis # # HUC8: ~2000 units, good for watershed analysis # # HUC2: ~18 units, good for regional analysis # # # 2. Manage memory for large analyses # gc() # Garbage collection # clear_data_cache() # Clear package cache # # # 3. Use parallel processing for multiple years # # But be careful not to use too many cores # # # 4. Save intermediate results # save_spatial_data(results$agricultural, "intermediate_results.rds") ## ----reproducibility, eval=FALSE---------------------------------------------- # # Always document your analysis parameters # analysis_params <- list( # scale = "huc8", # year = 2016, # nutrients = "nitrogen", # cropland_threshold = 1234, # include_wwtp = TRUE, # analysis_date = Sys.Date(), # package_version = packageVersion("manureshed") # ) # # # Save parameters with results # results$analysis_params <- analysis_params # save_analysis_summary(results, "analysis_summary.json", format = "json")