From fc1a71a9e1200e430fd595b40a54cf70886272a1 Mon Sep 17 00:00:00 2001 From: Zongheng Yang Date: Tue, 1 Jul 2014 20:49:48 -0700 Subject: [PATCH] Fix that collect(parallelize(sc,1:72,15)) drops elements. --- pkg/R/context.R | 2 +- pkg/inst/tests/test_parallelize_collect.R | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/pkg/R/context.R b/pkg/R/context.R index 87a4163284d7d..9f7ff8917566d 100644 --- a/pkg/R/context.R +++ b/pkg/R/context.R @@ -62,7 +62,7 @@ parallelize <- function(sc, coll, numSlices = 1) { if (numSlices > length(coll)) numSlices <- length(coll) - sliceLen <- length(coll) %/% numSlices + sliceLen <- ceiling(length(coll) / numSlices) slices <- split(coll, rep(1:(numSlices + 1), each = sliceLen)[1:length(coll)]) # Serialize each slice: obtain a list of raws, or a list of lists (slices) of diff --git a/pkg/inst/tests/test_parallelize_collect.R b/pkg/inst/tests/test_parallelize_collect.R index 841248fc84275..a37465a36ef68 100644 --- a/pkg/inst/tests/test_parallelize_collect.R +++ b/pkg/inst/tests/test_parallelize_collect.R @@ -67,6 +67,17 @@ test_that("collect(), following a parallelize(), gives back the original collect expect_equal(collect(strListRDD2), as.list(strList)) }) +test_that("regression: collect() following a parallelize() does not drop elements", { + lapply(1:72, + function(collLen) { + lapply(1:15, function(numPart) { + expected <- runif(collLen) + actual <- collect(parallelize(jsc, expected, numPart)) + expect_equal(actual, as.list(expected)) + }) + }) +}) + test_that("parallelize() and collect() work for lists of pairs (pairwise data)", { # use the pairwise logical to indicate pairwise data numPairsRDDD1 <- parallelize(jsc, numPairs, 1)