-
I would like to develop a random matrix generator and I found very interesting example at https://cran.r-project.org/web/packages/dqrng/vignettes/parallel.html -----------------------------1-PCG: multiple streams with RcppParallel--------------------------------------------------- #include <Rcpp.h>
// [[Rcpp::depends(dqrng, BH)]]
#include <dqrng_generator.h>
#include <dqrng_sample.h>
// [[Rcpp::depends(RcppParallel)]]
#include <RcppParallel.h>
struct RandomFill : public RcppParallel::Worker {
RcppParallel::RMatrix<int> output;
uint64_t seed;
RandomFill(Rcpp::IntegerMatrix output, const uint64_t seed) : output(output), seed(seed) {};
void operator()(std::size_t begin, std::size_t end) {
auto rng = dqrng::generator<pcg64>(seed, end);
for (std::size_t col = begin; col < end; ++col) {
auto sampled = dqrng::sample::sample<std::vector<int>, uint32_t>(*rng, 100000, output.nrow(), true);
RcppParallel::RMatrix<int>::Column column = output.column(col);
std::copy(sampled.begin(), sampled.end(), column.begin());
}
}
};
// [[Rcpp::export]]
Rcpp::IntegerMatrix parallel_random_matrix(const int n, const int m, const int ncores) {
Rcpp::IntegerMatrix res(n, m);
RandomFill randomFill(res, 42);
RcppParallel::parallelFor(0, m, randomFill, m/ncores + 1);
return res;
} and I also would like to understand the difference between the former (1) and the latter (2) approach -----------------------------2-Xo(ro)shiro: jump ahead with OpenMP------------------------------------- #include <Rcpp.h>
// [[Rcpp::depends(dqrng, BH)]]
#include <dqrng_distribution.h>
// [[Rcpp::plugins(openmp)]]
#include <omp.h>
// [[Rcpp::export]]
std::vector<double> random_sum(int n, int m) {
dqrng::uniform_distribution dist(0.0, 1.0); // Uniform distribution [0,1)
auto rng = dqrng::generator<dqrng::xoshiro256plusplus>(); // seeded from R's RNG
std::vector<double> res(m);
for (int i = 0; i < m; ++i) {
double lres(0);
for (int j = 0; j < n; ++j) {
lres += dist(*rng);
}
res[i] = lres / n;
}
return res;
}
// [[Rcpp::export]]
std::vector<double> parallel_random_sum(int n, int m, int ncores) {
dqrng::uniform_distribution dist(0.0, 1.0); // Uniform distribution [0,1)
auto rng = dqrng::generator<dqrng::xoshiro256plusplus>(); // seeded from R's RNG
std::vector<double> res(m);
// ok to use rng here
#pragma omp parallel num_threads(ncores)
{
// make thread local copy of rng and advance it by 1 ... ncores jumps
auto lrng = rng->clone(omp_get_thread_num() + 1);
#pragma omp for
for (int i = 0; i < m; ++i) {
double lres(0);
for (int j = 0; j < n; ++j) {
lres += dist(*lrng);
}
res[i] = lres / n;
}
}
// ok to use rng here
return res;
} in order to understand which is the more efficient and faster of the two solutions. |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments
-
It is pretty straightforward to adapt the first example, since it already generates a matrix: #include <Rcpp.h>
// [[Rcpp::depends(dqrng, BH)]]
#include <dqrng_distribution.h>
// [[Rcpp::depends(RcppParallel)]]
#include <RcppParallel.h>
struct RandomFill : public RcppParallel::Worker {
RcppParallel::RMatrix<double> output;
uint64_t seed;
double min;
double max;
RandomFill(Rcpp::NumericMatrix output,
const uint64_t seed,
const double min = 0.0,
const double max = 1.0) : output(output), seed(seed), min(min), max(max) {};
void operator()(std::size_t begin, std::size_t end) {
auto rng = dqrng::generator<>(seed, end);
for (std::size_t col = begin; col < end; ++col) {
RcppParallel::RMatrix<double>::Column column = output.column(col);
rng->generate<dqrng::uniform_distribution>(column, min, max);
}
}
};
// [[Rcpp::export]]
Rcpp::NumericMatrix parallel_random_matrix(const int n,
const int m,
const int ncores = 2,
const double min = 0.0,
const double max = 1.0) {
Rcpp::NumericMatrix res(n, m);
RandomFill randomFill(res, 42, min, max);
RcppParallel::parallelFor(0, m, randomFill, m/ncores + 1);
return res;
}
/*** R
parallel_random_matrix(1e2, 6)
*/ Note that i am not asking for a specific RNG, which gives you the default (Xoroshiro128++), but you can select anything you like. However, you have to fix this in your cade and you have to make sure to seed the RNG properly (hard coded here to However, with the latest version there is the #include <Rcpp.h>
// [[Rcpp::depends(dqrng, BH, RcppParallel)]]
// [[Rcpp::plugins(openmp)]]
#include <dqrng_extra/parallel_generate.h>
#include <dqrng_distribution.h>
using dqrng::extra::parallel_generate;
// [[Rcpp::export]]
Rcpp::NumericVector runif_para(std::size_t n, std::size_t m, double min = 0, double max = 1,
std::size_t threads = 2, std::size_t streams = 24) {
auto res = parallel_generate<dqrng::uniform_distribution>(n * m, threads, streams, min, max);
res.attr("dim") = Rcpp::Dimension(n, m);
return res;
}
/*** R
runif_para(1e2, 6)
*/ Note that with this code you set the seed using |
Beta Was this translation helpful? Give feedback.
-
Wonderful! Thank you very much |
Beta Was this translation helpful? Give feedback.
It is pretty straightforward to adapt the first example, since it already generates a matrix: