Skip to content

Commit

Permalink
Merge pull request apache#65 from concretevitamin/parallelize-fix
Browse files Browse the repository at this point in the history
Fix that collect(parallelize(sc,1:72,15)) drops elements.
  • Loading branch information
concretevitamin committed Jul 2, 2014
2 parents b8204c5 + fc1a71a commit e1f95b6
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pkg/R/context.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ parallelize <- function(sc, coll, numSlices = 1) {
if (numSlices > length(coll))
numSlices <- length(coll)

sliceLen <- length(coll) %/% numSlices
sliceLen <- ceiling(length(coll) / numSlices)
slices <- split(coll, rep(1:(numSlices + 1), each = sliceLen)[1:length(coll)])

# Serialize each slice: obtain a list of raws, or a list of lists (slices) of
Expand Down
11 changes: 11 additions & 0 deletions pkg/inst/tests/test_parallelize_collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,17 @@ test_that("collect(), following a parallelize(), gives back the original collect
expect_equal(collect(strListRDD2), as.list(strList))
})

test_that("regression: collect() following a parallelize() does not drop elements", {
lapply(1:72,
function(collLen) {
lapply(1:15, function(numPart) {
expected <- runif(collLen)
actual <- collect(parallelize(jsc, expected, numPart))
expect_equal(actual, as.list(expected))
})
})
})

test_that("parallelize() and collect() work for lists of pairs (pairwise data)", {
# use the pairwise logical to indicate pairwise data
numPairsRDDD1 <- parallelize(jsc, numPairs, 1)
Expand Down

0 comments on commit e1f95b6

Please sign in to comment.