Parallel simulation in R

parallelism
foreach
doParallel
Author

Jong-Hoon Kim

Published

June 14, 2024

I find that parallel, doParallel and foreach packages provide the easiest approach for parallel computing in R. The doParallel vignette provides a great overview. library(doParallel) command automatically loads required packages.

library(doParallel)
ncores <-  detectCores()
cl <- makeCluster(getOption("cl.cores", ncores/2))
registerDoParallel(cl)
nruns <- ncores

x <- 2
res <- foreach(i=1:4, .combine=c) %dopar% { c(i, x^2) }

stopCluster(cl)
res
[1] 1 4 2 4 3 4 4 4

We can export packages.

cl <- makeCluster(getOption("cl.cores", ncores/2))
registerDoParallel(cl)

res <- 
  foreach(i=1:5, .packages=c("data.table"), .combine=cbind) %dopar% {
    frollmean(rnorm(10), 7)
  }

stopCluster(cl)
res
         result.1   result.2   result.3  result.4     result.5
 [1,]          NA         NA         NA        NA           NA
 [2,]          NA         NA         NA        NA           NA
 [3,]          NA         NA         NA        NA           NA
 [4,]          NA         NA         NA        NA           NA
 [5,]          NA         NA         NA        NA           NA
 [6,]          NA         NA         NA        NA           NA
 [7,]  0.10610803 -0.2996241 0.03278228 0.4701361 -0.004181703
 [8,]  0.05807023 -0.3114088 0.20836936 0.3251219  0.400871629
 [9,]  0.14870568 -0.3538062 0.35330620 0.3301056  0.293785357
[10,] -0.01413357 -0.4768548 0.19839694 0.2946405 -0.029407561

We may want to set the same seed for each worker.

cl <- makeCluster(getOption("cl.cores", ncores/2))
registerDoParallel(cl)

res <- 
  foreach(i=1:5, .packages=c("data.table"), .combine=cbind) %dopar% {
    set.seed(12)
    frollmean(rnorm(10), 7)
}
stopCluster(cl)
res
        result.1   result.2   result.3   result.4   result.5
 [1,]         NA         NA         NA         NA         NA
 [2,]         NA         NA         NA         NA         NA
 [3,]         NA         NA         NA         NA         NA
 [4,]         NA         NA         NA         NA         NA
 [5,]         NA         NA         NA         NA         NA
 [6,]         NA         NA         NA         NA         NA
 [7,] -0.6236335 -0.6236335 -0.6236335 -0.6236335 -0.6236335
 [8,] -0.5018746 -0.5018746 -0.5018746 -0.5018746 -0.5018746
 [9,] -0.7423937 -0.7423937 -0.7423937 -0.7423937 -0.7423937
[10,] -0.5445709 -0.5445709 -0.5445709 -0.5445709 -0.5445709

Different foreach sessions with the same random seed are not reproducible.

cl <- makeCluster(getOption("cl.cores", ncores/2))
registerDoParallel(cl)

set.seed(12)
a1 <- foreach(i=1:2, combine=cbind) %dopar% { rnorm(5) }
b1 <- foreach(i=1:2, combine=cbind) %dopar% { rnorm(5) }
set.seed(12)
a2 <- foreach(i=1:2, combine=cbind) %dopar% { rnorm(5) }
b2 <- foreach(i=1:2, combine=cbind) %dopar% { rnorm(5) }
identical(a1, a2) && identical(b1, b2)
[1] FALSE
stopCluster(cl)

Reproducibility across different foreach sessions are possible using doRNG package. Examples below were adapted from the stackoverflow post.

You use %dorng% instead of %dopar% in the doRNG approach

library(doRNG)

cl <- makeCluster(getOption("cl.cores", ncores/2))
registerDoParallel(cl)

set.seed(12)
a1 <- foreach(i=1:2, combine=cbind) %dorng% { rnorm(5) }
b1 <- foreach(i=1:2, combine=cbind) %dorng% { rnorm(5) }

set.seed(12)
a2 <- foreach(i=1:2, combine=cbind) %dorng% { rnorm(5) }
b2 <- foreach(i=1:2, combine=cbind) %dorng% { rnorm(5) }
identical(a1, a2) && identical(b1, b2)
[1] TRUE
stopCluster(cl)

We may want to examine parallel workers.

cl <- makeCluster(getOption("cl.cores", ncores/2))
registerDoParallel(cl)
getDoParWorkers()
[1] 10
getDoParName()
[1] "doParallelSNOW"
getDoParVersion()
[1] "1.0.17"
stopCluster(cl)