[R] Use multiple cores on Linux

Wed Apr 20 16:37:07 CEST 2016

I am trying to run the following code in R on a Linux cluster. I would like
to use the full processing power (specifying cores/nodes/memory). The code
essentially runs predictions based on a GAM regression and saves the
results as a CSV file for multiple sets of data (here I only show two).

Is it possible to run this code using HPC packages such as
Rmpi/snow/doParallel? Thank you!

#####################
library(data.table)
library(mgcv)
library(reshape2)
library(dplyr)
library(tidyr)
library(lubridate)
library(DataCombine)
#
gam_max_count_wk <- gam(count_pop ~ factor(citycode) + factor(year) +
factor(week) + s(lnincome) + s(tmax) +
s(hmax),data=cont,na.action="na.omit", method="ML")

#
# Historic
temp_hist <- read.csv("/work/sd00815/giss_historic/giss_temp_hist.csv")
humid_hist <- read.csv("/work/sd00815/giss_historic/giss_hum_hist.csv")
#
temp_hist <- as.data.table(temp_hist)
humid_hist <- as.data.table(humid_hist)
#
# Merge
mykey<- c("FIPS", "year","month", "week")
setkeyv(temp_hist, mykey)
setkeyv(humid_hist, mykey)
#
hist<- merge(temp_hist, humid_hist, by=mykey)
#
hist$X.x <- NULL
hist$X.y <- NULL
#
# Max
hist_max <- hist
hist_max$FIPS <- hist_max$year <- hist_max$month <- hist_max$tmin <-
hist_max$tmean <- hist_max$hmin <- hist_max$hmean <- NULL
#
# Adding Factors
hist_max$citycode <- rep(101,nrow(hist_max))
hist_max$year <- rep(2010,nrow(hist_max))
hist_max$lnincome <- rep(10.262,nrow(hist_max))
#
# Predictions
pred_hist_max <- predict.gam(gam_max_count_wk,hist_max)
#
pred_hist_max <- as.data.table(pred_hist_max)
pred_hist_max <- cbind(hist, pred_hist_max)
pred_hist_max$tmax <- pred_hist_max$tmean <- pred_hist_max$tmin <-
pred_hist_max$hmean <- pred_hist_max$hmax <- pred_hist_max$hmin <- NULL
#
# Aggregate by FIPS
max_hist <- pred_hist_max %>%
  group_by(FIPS) %>%
  summarise(pred_hist = mean(pred_hist_max))
#
### Future
## 4.5
# 4.5_2021_2050
temp_sim <-
read.csv("/work/sd00815/giss_future/giss_4.5_2021_2050_temp.csv")
humid_sim <-
read.csv("/work/sd00815/giss_future/giss_4.5_2021_2050_temp.csv")
#
# Max
temp_sim <- as.data.table(temp_sim)
setnames(temp_sim, "max", "tmax")
setnames(temp_sim, "min", "tmin")
setnames(temp_sim, "avg", "tmean")
#
humid_sim <- as.data.table(humid_sim)
setnames(humid_sim, "max", "hmax")
setnames(humid_sim, "min", "hmin")
setnames(humid_sim, "avg", "hmean")
#
temp_sim$X <- NULL
humid_sim$X <- NULL
#
# Merge
mykey<- c("FIPS", "year","month", "week")
setkeyv(temp_sim, mykey)
setkeyv(humid_sim, mykey)
#
sim <- merge(temp_sim, humid_sim, by=mykey)
#
sim_max <- sim
#
sim_max$FIPS <- sim_max$year <- sim_max$month <- sim_max$tmin <-
sim_max$tmean <- sim_max$hmin <- sim_max$hmean <- NULL
#
# Adding Factors
sim_max$citycode <- rep(101,nrow(sim_max))
sim_max$year <- rep(2010,nrow(sim_max))
sim_max$week <- rep(1,nrow(sim_max))
sim_max$lnincome <- rep(10.262,nrow(sim_max))
#
# Predictions
pred_sim_max <- predict.gam(gam_max_count_wk,sim_max)
#
pred_sim_max <- as.data.table(pred_sim_max)
pred_sim_max <- cbind(sim, pred_sim_max)
pred_sim_max$tmax <- pred_sim_max$tmean <- pred_sim_max$tmin <-
pred_sim_max$hmean <- pred_sim_max$hmax <- pred_sim_max$hmin <- NULL
#
# Aggregate by FIPS
max_sim <- pred_sim_max %>%
  group_by(FIPS) %>%
  summarise(pred_sim = mean(pred_sim_max))
#
# Merge with Historical Data
max_hist$FIPS <- as.factor(max_hist$FIPS)
max_sim$FIPS <- as.factor(max_sim$FIPS)
#
mykey1<- c("FIPS")
setkeyv(max_hist, mykey1)
setkeyv(max_sim, mykey1)
max_change <- merge(max_hist, max_sim, by=mykey1)
max_change$change <-
((max_change$pred_sim-max_change$pred_hist)/max_change$pred_hist)*100
#
write.csv(max_change, file =
"/work/sd00815/projections_data/year_wk_fe/giss/max/giss_4.5_2021_2050.csv")

# 4.5_2081_2100
temp_sim <-
read.csv("/work/sd00815/giss_future/giss_4.5_2081_2100_temp.csv")
humid_sim <-
read.csv("/work/sd00815/giss_future/giss_4.5_2081_2100_temp.csv")
#
# Max
temp_sim <- as.data.table(temp_sim)
setnames(temp_sim, "max", "tmax")
setnames(temp_sim, "min", "tmin")
setnames(temp_sim, "avg", "tmean")
#
humid_sim <- as.data.table(humid_sim)
setnames(humid_sim, "max", "hmax")
setnames(humid_sim, "min", "hmin")
setnames(humid_sim, "avg", "hmean")
#
temp_sim$X <- NULL
humid_sim$X <- NULL
#
# Merge
mykey<- c("FIPS", "year","month", "week")
setkeyv(temp_sim, mykey)
setkeyv(humid_sim, mykey)
#
sim <- merge(temp_sim, humid_sim, by=mykey)
#
sim_max <- sim
#
sim_max$FIPS <- sim_max$year <- sim_max$month <- sim_max$tmin <-
sim_max$tmean <- sim_max$hmin <- sim_max$hmean <- NULL
#
# Adding Factors
sim_max$citycode <- rep(101,nrow(sim_max))
sim_max$year <- rep(2010,nrow(sim_max))
sim_max$week <- rep(1,nrow(sim_max))
sim_max$lnincome <- rep(10.262,nrow(sim_max))
#
# Predictions
pred_sim_max <- predict.gam(gam_max_count_wk,sim_max)
#
pred_sim_max <- as.data.table(pred_sim_max)
pred_sim_max <- cbind(sim, pred_sim_max)
pred_sim_max$tmax <- pred_sim_max$tmean <- pred_sim_max$tmin <-
pred_sim_max$hmean <- pred_sim_max$hmax <- pred_sim_max$hmin <- NULL
#
# Aggregate by FIPS
max_sim <- pred_sim_max %>%
  group_by(FIPS) %>%
  summarise(pred_sim = mean(pred_sim_max))
#
# Merge with Historical Data
max_hist$FIPS <- as.factor(max_hist$FIPS)
max_sim$FIPS <- as.factor(max_sim$FIPS)
#
mykey1<- c("FIPS")
setkeyv(max_hist, mykey1)
setkeyv(max_sim, mykey1)
max_change <- merge(max_hist, max_sim, by=mykey1)
max_change$change <-
((max_change$pred_sim-max_change$pred_hist)/max_change$pred_hist)*100
#
write.csv(max_change, file =
"/work/sd00815/projections_data/year_wk_fe/giss/max/giss_4.5_2081_2100.csv")

####################

Sincerely,

Milu

	[[alternative HTML version deleted]]