diff --git a/R/pkg/.Rbuildignore b/R/pkg/.Rbuildignore index f12f8c275a989..18b2db69db8f1 100644 --- a/R/pkg/.Rbuildignore +++ b/R/pkg/.Rbuildignore @@ -6,3 +6,4 @@ ^README\.Rmd$ ^src-native$ ^html$ +^tests/fulltests/* diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R index 4ca7aa664e023..ec931befa2854 100644 --- a/R/pkg/R/install.R +++ b/R/pkg/R/install.R @@ -267,7 +267,7 @@ hadoopVersionName <- function(hadoopVersion) { # The implementation refers to appdirs package: https://pypi.python.org/pypi/appdirs and # adapt to Spark context sparkCachePath <- function() { - if (.Platform$OS.type == "windows") { + if (is_windows()) { winAppPath <- Sys.getenv("LOCALAPPDATA", unset = NA) if (is.na(winAppPath)) { stop(paste("%LOCALAPPDATA% not found.", diff --git a/R/pkg/R/utils.R b/R/pkg/R/utils.R index ea45e394500e8..91483a4d23d9b 100644 --- a/R/pkg/R/utils.R +++ b/R/pkg/R/utils.R @@ -908,10 +908,6 @@ isAtomicLengthOne <- function(x) { is.atomic(x) && length(x) == 1 } -is_cran <- function() { - !identical(Sys.getenv("NOT_CRAN"), "true") -} - is_windows <- function() { .Platform$OS.type == "windows" } @@ -920,6 +916,6 @@ hadoop_home_set <- function() { !identical(Sys.getenv("HADOOP_HOME"), "") } -not_cran_or_windows_with_hadoop <- function() { - !is_cran() && (!is_windows() || hadoop_home_set()) +windows_with_hadoop <- function() { + !is_windows() || hadoop_home_set() } diff --git a/R/pkg/tests/fulltests/test_Serde.R b/R/pkg/tests/fulltests/test_Serde.R index 6e160fae1afed..6bbd201bf1d82 100644 --- a/R/pkg/tests/fulltests/test_Serde.R +++ b/R/pkg/tests/fulltests/test_Serde.R @@ -20,8 +20,6 @@ context("SerDe functionality") sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) test_that("SerDe of primitive types", { - skip_on_cran() - x <- callJStatic("SparkRHandler", "echo", 1L) expect_equal(x, 1L) expect_equal(class(x), "integer") @@ -40,8 +38,6 @@ test_that("SerDe of primitive types", { }) test_that("SerDe of list of primitive types", { - skip_on_cran() - x <- list(1L, 2L, 3L) y <- callJStatic("SparkRHandler", "echo", x) expect_equal(x, y) @@ -69,8 +65,6 @@ test_that("SerDe of list of primitive types", { }) test_that("SerDe of list of lists", { - skip_on_cran() - x <- list(list(1L, 2L, 3L), list(1, 2, 3), list(TRUE, FALSE), list("a", "b", "c")) y <- callJStatic("SparkRHandler", "echo", x) diff --git a/R/pkg/tests/fulltests/test_Windows.R b/R/pkg/tests/fulltests/test_Windows.R index 00d684e1a49ef..b2ec6c67311db 100644 --- a/R/pkg/tests/fulltests/test_Windows.R +++ b/R/pkg/tests/fulltests/test_Windows.R @@ -17,9 +17,7 @@ context("Windows-specific tests") test_that("sparkJars tag in SparkContext", { - skip_on_cran() - - if (.Platform$OS.type != "windows") { + if (!is_windows()) { skip("This test is only for Windows, skipped") } @@ -27,6 +25,3 @@ test_that("sparkJars tag in SparkContext", { abcPath <- testOutput[1] expect_equal(abcPath, "a\\b\\c") }) - -message("--- End test (Windows) ", as.POSIXct(Sys.time(), tz = "GMT")) -message("elapsed ", (proc.time() - timer_ptm)[3]) diff --git a/R/pkg/tests/fulltests/test_binaryFile.R b/R/pkg/tests/fulltests/test_binaryFile.R index 00954fa31b0ee..758b174b8787c 100644 --- a/R/pkg/tests/fulltests/test_binaryFile.R +++ b/R/pkg/tests/fulltests/test_binaryFile.R @@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", mockFile <- c("Spark is pretty.", "Spark is awesome.") test_that("saveAsObjectFile()/objectFile() following textFile() works", { - skip_on_cran() - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) @@ -40,8 +38,6 @@ test_that("saveAsObjectFile()/objectFile() following textFile() works", { }) test_that("saveAsObjectFile()/objectFile() works on a parallelized list", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") l <- list(1, 2, 3) @@ -54,8 +50,6 @@ test_that("saveAsObjectFile()/objectFile() works on a parallelized list", { }) test_that("saveAsObjectFile()/objectFile() following RDD transformations works", { - skip_on_cran() - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) @@ -80,8 +74,6 @@ test_that("saveAsObjectFile()/objectFile() following RDD transformations works", }) test_that("saveAsObjectFile()/objectFile() works with multiple paths", { - skip_on_cran() - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") diff --git a/R/pkg/tests/fulltests/test_binary_function.R b/R/pkg/tests/fulltests/test_binary_function.R index 236cb3885445e..442bed509bb1d 100644 --- a/R/pkg/tests/fulltests/test_binary_function.R +++ b/R/pkg/tests/fulltests/test_binary_function.R @@ -29,8 +29,6 @@ rdd <- parallelize(sc, nums, 2L) mockFile <- c("Spark is pretty.", "Spark is awesome.") test_that("union on two RDDs", { - skip_on_cran() - actual <- collectRDD(unionRDD(rdd, rdd)) expect_equal(actual, as.list(rep(nums, 2))) @@ -53,8 +51,6 @@ test_that("union on two RDDs", { }) test_that("cogroup on two RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) cogroup.rdd <- cogroup(rdd1, rdd2, numPartitions = 2L) @@ -73,8 +69,6 @@ test_that("cogroup on two RDDs", { }) test_that("zipPartitions() on RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, 1:2, 2L) # 1, 2 rdd2 <- parallelize(sc, 1:4, 2L) # 1:2, 3:4 rdd3 <- parallelize(sc, 1:6, 2L) # 1:3, 4:6 diff --git a/R/pkg/tests/fulltests/test_broadcast.R b/R/pkg/tests/fulltests/test_broadcast.R index 2c96740df77bb..fc2c7c2deb825 100644 --- a/R/pkg/tests/fulltests/test_broadcast.R +++ b/R/pkg/tests/fulltests/test_broadcast.R @@ -26,8 +26,6 @@ nums <- 1:2 rrdd <- parallelize(sc, nums, 2L) test_that("using broadcast variable", { - skip_on_cran() - randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) randomMatBr <- broadcastRDD(sc, randomMat) @@ -40,8 +38,6 @@ test_that("using broadcast variable", { }) test_that("without using broadcast variable", { - skip_on_cran() - randomMat <- matrix(nrow = 10, ncol = 10, data = rnorm(100)) useBroadcast <- function(x) { diff --git a/R/pkg/tests/fulltests/test_client.R b/R/pkg/tests/fulltests/test_client.R index 3d53bebab6300..0cf25fe1dbf39 100644 --- a/R/pkg/tests/fulltests/test_client.R +++ b/R/pkg/tests/fulltests/test_client.R @@ -18,8 +18,6 @@ context("functions in client.R") test_that("adding spark-testing-base as a package works", { - skip_on_cran() - args <- generateSparkSubmitArgs("", "", "", "", "holdenk:spark-testing-base:1.3.0_0.0.5") expect_equal(gsub("[[:space:]]", "", args), @@ -28,22 +26,16 @@ test_that("adding spark-testing-base as a package works", { }) test_that("no package specified doesn't add packages flag", { - skip_on_cran() - args <- generateSparkSubmitArgs("", "", "", "", "") expect_equal(gsub("[[:space:]]", "", args), "") }) test_that("multiple packages don't produce a warning", { - skip_on_cran() - expect_warning(generateSparkSubmitArgs("", "", "", "", c("A", "B")), NA) }) test_that("sparkJars sparkPackages as character vectors", { - skip_on_cran() - args <- generateSparkSubmitArgs("", "", c("one.jar", "two.jar", "three.jar"), "", c("com.databricks:spark-avro_2.10:2.0.1")) expect_match(args, "--jars one.jar,two.jar,three.jar") diff --git a/R/pkg/tests/fulltests/test_context.R b/R/pkg/tests/fulltests/test_context.R index f6d9f5423df02..710485d56685a 100644 --- a/R/pkg/tests/fulltests/test_context.R +++ b/R/pkg/tests/fulltests/test_context.R @@ -18,8 +18,6 @@ context("test functions in sparkR.R") test_that("Check masked functions", { - skip_on_cran() - # Check that we are not masking any new function from base, stats, testthat unexpectedly # NOTE: We should avoid adding entries to *namesOfMaskedCompletely* as masked functions make it # hard for users to use base R functions. Please check when in doubt. @@ -57,8 +55,6 @@ test_that("Check masked functions", { }) test_that("repeatedly starting and stopping SparkR", { - skip_on_cran() - for (i in 1:4) { sc <- suppressWarnings(sparkR.init(master = sparkRTestMaster)) rdd <- parallelize(sc, 1:20, 2L) @@ -77,8 +73,6 @@ test_that("repeatedly starting and stopping SparkSession", { }) test_that("rdd GC across sparkR.stop", { - skip_on_cran() - sc <- sparkR.sparkContext(master = sparkRTestMaster) # sc should get id 0 rdd1 <- parallelize(sc, 1:20, 2L) # rdd1 should get id 1 rdd2 <- parallelize(sc, 1:10, 2L) # rdd2 should get id 2 @@ -102,8 +96,6 @@ test_that("rdd GC across sparkR.stop", { }) test_that("job group functions can be called", { - skip_on_cran() - sc <- sparkR.sparkContext(master = sparkRTestMaster) setJobGroup("groupId", "job description", TRUE) cancelJobGroup("groupId") @@ -116,16 +108,12 @@ test_that("job group functions can be called", { }) test_that("utility function can be called", { - skip_on_cran() - sparkR.sparkContext(master = sparkRTestMaster) setLogLevel("ERROR") sparkR.session.stop() }) test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whitelist", { - skip_on_cran() - e <- new.env() e[["spark.driver.memory"]] <- "512m" ops <- getClientModeSparkSubmitOpts("sparkrmain", e) @@ -153,8 +141,6 @@ test_that("getClientModeSparkSubmitOpts() returns spark-submit args from whiteli }) test_that("sparkJars sparkPackages as comma-separated strings", { - skip_on_cran() - expect_warning(processSparkJars(" a, b ")) jars <- suppressWarnings(processSparkJars(" a, b ")) expect_equal(lapply(jars, basename), list("a", "b")) @@ -182,8 +168,6 @@ test_that("spark.lapply should perform simple transforms", { }) test_that("add and get file to be downloaded with Spark job on every node", { - skip_on_cran() - sparkR.sparkContext(master = sparkRTestMaster) # Test add file. path <- tempfile(pattern = "hello", fileext = ".txt") diff --git a/R/pkg/tests/fulltests/test_includePackage.R b/R/pkg/tests/fulltests/test_includePackage.R index d7d9eeed1575e..f4ea0d1b5cb27 100644 --- a/R/pkg/tests/fulltests/test_includePackage.R +++ b/R/pkg/tests/fulltests/test_includePackage.R @@ -26,8 +26,6 @@ nums <- 1:2 rdd <- parallelize(sc, nums, 2L) test_that("include inside function", { - skip_on_cran() - # Only run the test if plyr is installed. if ("plyr" %in% rownames(installed.packages())) { suppressPackageStartupMessages(library(plyr)) @@ -44,8 +42,6 @@ test_that("include inside function", { }) test_that("use include package", { - skip_on_cran() - # Only run the test if plyr is installed. if ("plyr" %in% rownames(installed.packages())) { suppressPackageStartupMessages(library(plyr)) diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R b/R/pkg/tests/fulltests/test_mllib_classification.R index 82e588dc460d0..726e9d9a20b1c 100644 --- a/R/pkg/tests/fulltests/test_mllib_classification.R +++ b/R/pkg/tests/fulltests/test_mllib_classification.R @@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) { } test_that("spark.svmLinear", { - skip_on_cran() - df <- suppressWarnings(createDataFrame(iris)) training <- df[df$Species %in% c("versicolor", "virginica"), ] model <- spark.svmLinear(training, Species ~ ., regParam = 0.01, maxIter = 10) @@ -51,7 +49,7 @@ test_that("spark.svmLinear", { expect_equal(sort(as.list(take(select(prediction, "prediction"), 10))[[1]]), expected) # Test model save and load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-svm-linear", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -131,7 +129,7 @@ test_that("spark.logit", { expect_true(all(abs(setosaCoefs - setosaCoefs) < 0.1)) # Test model save and load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-logit", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -228,8 +226,6 @@ test_that("spark.logit", { }) test_that("spark.mlp", { - skip_on_cran() - df <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 4, 3), @@ -250,7 +246,7 @@ test_that("spark.mlp", { expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "0.0", "0.0", "0.0", "0.0", "0.0")) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-mlp", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -363,7 +359,7 @@ test_that("spark.naiveBayes", { "Yes", "Yes", "No", "No")) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-naiveBayes", fileext = ".tmp") write.ml(m, modelPath) expect_error(write.ml(m, modelPath)) diff --git a/R/pkg/tests/fulltests/test_mllib_clustering.R b/R/pkg/tests/fulltests/test_mllib_clustering.R index e827e961ab4c1..4110e13da4948 100644 --- a/R/pkg/tests/fulltests/test_mllib_clustering.R +++ b/R/pkg/tests/fulltests/test_mllib_clustering.R @@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) { } test_that("spark.bisectingKmeans", { - skip_on_cran() - newIris <- iris newIris$Species <- NULL training <- suppressWarnings(createDataFrame(newIris)) @@ -55,7 +53,7 @@ test_that("spark.bisectingKmeans", { c(0, 1, 2, 3)) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-bisectingkmeans", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -129,7 +127,7 @@ test_that("spark.gaussianMixture", { expect_equal(p$prediction, c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1)) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-gaussianMixture", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -177,7 +175,7 @@ test_that("spark.kmeans", { expect_true(class(summary.model$coefficients[1, ]) == "numeric") # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-kmeans", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -244,7 +242,7 @@ test_that("spark.lda with libsvm", { expect_true(logPrior <= 0 & !is.na(logPrior)) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-lda", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -265,8 +263,6 @@ test_that("spark.lda with libsvm", { }) test_that("spark.lda with text input", { - skip_on_cran() - text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, optimizer = "online", features = "value") @@ -309,8 +305,6 @@ test_that("spark.lda with text input", { }) test_that("spark.posterior and spark.perplexity", { - skip_on_cran() - text <- read.text(absoluteSparkPath("data/mllib/sample_lda_data.txt")) model <- spark.lda(text, features = "value", k = 3) diff --git a/R/pkg/tests/fulltests/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R index 4e10ca1e4f50b..69dda52f0c279 100644 --- a/R/pkg/tests/fulltests/test_mllib_fpm.R +++ b/R/pkg/tests/fulltests/test_mllib_fpm.R @@ -62,7 +62,7 @@ test_that("spark.fpGrowth", { expect_equivalent(expected_predictions, collect(predict(model, new_data))) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-fpm", fileext = ".tmp") write.ml(model, modelPath, overwrite = TRUE) loaded_model <- read.ml(modelPath) diff --git a/R/pkg/tests/fulltests/test_mllib_recommendation.R b/R/pkg/tests/fulltests/test_mllib_recommendation.R index cc8064f88d27a..4d919c9d746b0 100644 --- a/R/pkg/tests/fulltests/test_mllib_recommendation.R +++ b/R/pkg/tests/fulltests/test_mllib_recommendation.R @@ -37,7 +37,7 @@ test_that("spark.als", { tolerance = 1e-4) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-als", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) diff --git a/R/pkg/tests/fulltests/test_mllib_regression.R b/R/pkg/tests/fulltests/test_mllib_regression.R index b05fdd350ca28..82472c92b9965 100644 --- a/R/pkg/tests/fulltests/test_mllib_regression.R +++ b/R/pkg/tests/fulltests/test_mllib_regression.R @@ -23,8 +23,6 @@ context("MLlib regression algorithms, except for tree-based algorithms") sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) test_that("formula of spark.glm", { - skip_on_cran() - training <- suppressWarnings(createDataFrame(iris)) # directly calling the spark API # dot minus and intercept vs native glm @@ -197,8 +195,6 @@ test_that("spark.glm summary", { }) test_that("spark.glm save/load", { - skip_on_cran() - training <- suppressWarnings(createDataFrame(iris)) m <- spark.glm(training, Sepal_Width ~ Sepal_Length + Species) s <- summary(m) @@ -226,8 +222,6 @@ test_that("spark.glm save/load", { }) test_that("formula of glm", { - skip_on_cran() - training <- suppressWarnings(createDataFrame(iris)) # dot minus and intercept vs native glm model <- glm(Sepal_Width ~ . - Species + 0, data = training) @@ -254,8 +248,6 @@ test_that("formula of glm", { }) test_that("glm and predict", { - skip_on_cran() - training <- suppressWarnings(createDataFrame(iris)) # gaussian family model <- glm(Sepal_Width ~ Sepal_Length + Species, data = training) @@ -300,8 +292,6 @@ test_that("glm and predict", { }) test_that("glm summary", { - skip_on_cran() - # gaussian family training <- suppressWarnings(createDataFrame(iris)) stats <- summary(glm(Sepal_Width ~ Sepal_Length + Species, data = training)) @@ -351,8 +341,6 @@ test_that("glm summary", { }) test_that("glm save/load", { - skip_on_cran() - training <- suppressWarnings(createDataFrame(iris)) m <- glm(Sepal_Width ~ Sepal_Length + Species, data = training) s <- summary(m) @@ -401,7 +389,7 @@ test_that("spark.isoreg", { expect_equal(predict_result$prediction, c(7.0, 7.0, 6.0, 5.5, 5.0, 4.0, 1.0)) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-isoreg", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -452,7 +440,7 @@ test_that("spark.survreg", { 2.390146, 2.891269, 2.891269), tolerance = 1e-4) # Test model save/load - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-survreg", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) diff --git a/R/pkg/tests/fulltests/test_mllib_tree.R b/R/pkg/tests/fulltests/test_mllib_tree.R index 31427ee52a5e9..9b3fc8d270b25 100644 --- a/R/pkg/tests/fulltests/test_mllib_tree.R +++ b/R/pkg/tests/fulltests/test_mllib_tree.R @@ -28,8 +28,6 @@ absoluteSparkPath <- function(x) { } test_that("spark.gbt", { - skip_on_cran() - # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.gbt(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16, seed = 123) @@ -46,7 +44,7 @@ test_that("spark.gbt", { expect_equal(stats$numFeatures, 6) expect_equal(length(stats$treeWeights), 20) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-gbtRegression", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -80,7 +78,7 @@ test_that("spark.gbt", { expect_equal(length(grep("setosa", predictions)), 50) expect_equal(length(grep("versicolor", predictions)), 50) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-gbtClassification", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -105,7 +103,7 @@ test_that("spark.gbt", { expect_equal(stats$maxDepth, 5) # spark.gbt classification can work on libsvm data - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { data <- read.df(absoluteSparkPath("data/mllib/sample_binary_classification_data.txt"), source = "libsvm") model <- spark.gbt(data, label ~ features, "classification") @@ -144,7 +142,7 @@ test_that("spark.randomForest", { expect_equal(stats$numTrees, 20) expect_equal(stats$maxDepth, 5) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-randomForestRegression", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -178,7 +176,7 @@ test_that("spark.randomForest", { expect_equal(length(grep("setosa", predictions)), 50) expect_equal(length(grep("versicolor", predictions)), 50) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-randomForestClassification", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -215,7 +213,7 @@ test_that("spark.randomForest", { expect_equal(length(grep("2.0", predictions)), 50) # spark.randomForest classification can work on libsvm data - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.randomForest(data, label ~ features, "classification") @@ -224,8 +222,6 @@ test_that("spark.randomForest", { }) test_that("spark.decisionTree", { - skip_on_cran() - # regression data <- suppressWarnings(createDataFrame(longley)) model <- spark.decisionTree(data, Employed ~ ., "regression", maxDepth = 5, maxBins = 16) @@ -242,7 +238,7 @@ test_that("spark.decisionTree", { expect_error(capture.output(stats), NA) expect_true(length(capture.output(stats)) > 6) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-decisionTreeRegression", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -273,7 +269,7 @@ test_that("spark.decisionTree", { expect_equal(length(grep("setosa", predictions)), 50) expect_equal(length(grep("versicolor", predictions)), 50) - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { modelPath <- tempfile(pattern = "spark-decisionTreeClassification", fileext = ".tmp") write.ml(model, modelPath) expect_error(write.ml(model, modelPath)) @@ -309,7 +305,7 @@ test_that("spark.decisionTree", { expect_equal(length(grep("2.0", predictions)), 50) # spark.decisionTree classification can work on libsvm data - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { data <- read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"), source = "libsvm") model <- spark.decisionTree(data, label ~ features, "classification") diff --git a/R/pkg/tests/fulltests/test_parallelize_collect.R b/R/pkg/tests/fulltests/test_parallelize_collect.R index 52d4c93ed9599..3d122ccaf448f 100644 --- a/R/pkg/tests/fulltests/test_parallelize_collect.R +++ b/R/pkg/tests/fulltests/test_parallelize_collect.R @@ -39,8 +39,6 @@ jsc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", # Tests test_that("parallelize() on simple vectors and lists returns an RDD", { - skip_on_cran() - numVectorRDD <- parallelize(jsc, numVector, 1) numVectorRDD2 <- parallelize(jsc, numVector, 10) numListRDD <- parallelize(jsc, numList, 1) @@ -68,8 +66,6 @@ test_that("parallelize() on simple vectors and lists returns an RDD", { }) test_that("collect(), following a parallelize(), gives back the original collections", { - skip_on_cran() - numVectorRDD <- parallelize(jsc, numVector, 10) expect_equal(collectRDD(numVectorRDD), as.list(numVector)) @@ -90,8 +86,6 @@ test_that("collect(), following a parallelize(), gives back the original collect }) test_that("regression: collect() following a parallelize() does not drop elements", { - skip_on_cran() - # 10 %/% 6 = 1, ceiling(10 / 6) = 2 collLen <- 10 numPart <- 6 @@ -101,8 +95,6 @@ test_that("regression: collect() following a parallelize() does not drop element }) test_that("parallelize() and collect() work for lists of pairs (pairwise data)", { - skip_on_cran() - # use the pairwise logical to indicate pairwise data numPairsRDDD1 <- parallelize(jsc, numPairs, 1) numPairsRDDD2 <- parallelize(jsc, numPairs, 2) diff --git a/R/pkg/tests/fulltests/test_rdd.R b/R/pkg/tests/fulltests/test_rdd.R index fb244e1d49e20..6ee1fceffd822 100644 --- a/R/pkg/tests/fulltests/test_rdd.R +++ b/R/pkg/tests/fulltests/test_rdd.R @@ -29,30 +29,22 @@ intPairs <- list(list(1L, -1), list(2L, 100), list(2L, 1), list(1L, 200)) intRdd <- parallelize(sc, intPairs, 2L) test_that("get number of partitions in RDD", { - skip_on_cran() - expect_equal(getNumPartitionsRDD(rdd), 2) expect_equal(getNumPartitionsRDD(intRdd), 2) }) test_that("first on RDD", { - skip_on_cran() - expect_equal(firstRDD(rdd), 1) newrdd <- lapply(rdd, function(x) x + 1) expect_equal(firstRDD(newrdd), 2) }) test_that("count and length on RDD", { - skip_on_cran() - expect_equal(countRDD(rdd), 10) expect_equal(lengthRDD(rdd), 10) }) test_that("count by values and keys", { - skip_on_cran() - mods <- lapply(rdd, function(x) { x %% 3 }) actual <- countByValue(mods) expected <- list(list(0, 3L), list(1, 4L), list(2, 3L)) @@ -64,40 +56,30 @@ test_that("count by values and keys", { }) test_that("lapply on RDD", { - skip_on_cran() - multiples <- lapply(rdd, function(x) { 2 * x }) actual <- collectRDD(multiples) expect_equal(actual, as.list(nums * 2)) }) test_that("lapplyPartition on RDD", { - skip_on_cran() - sums <- lapplyPartition(rdd, function(part) { sum(unlist(part)) }) actual <- collectRDD(sums) expect_equal(actual, list(15, 40)) }) test_that("mapPartitions on RDD", { - skip_on_cran() - sums <- mapPartitions(rdd, function(part) { sum(unlist(part)) }) actual <- collectRDD(sums) expect_equal(actual, list(15, 40)) }) test_that("flatMap() on RDDs", { - skip_on_cran() - flat <- flatMap(intRdd, function(x) { list(x, x) }) actual <- collectRDD(flat) expect_equal(actual, rep(intPairs, each = 2)) }) test_that("filterRDD on RDD", { - skip_on_cran() - filtered.rdd <- filterRDD(rdd, function(x) { x %% 2 == 0 }) actual <- collectRDD(filtered.rdd) expect_equal(actual, list(2, 4, 6, 8, 10)) @@ -113,8 +95,6 @@ test_that("filterRDD on RDD", { }) test_that("lookup on RDD", { - skip_on_cran() - vals <- lookup(intRdd, 1L) expect_equal(vals, list(-1, 200)) @@ -123,8 +103,6 @@ test_that("lookup on RDD", { }) test_that("several transformations on RDD (a benchmark on PipelinedRDD)", { - skip_on_cran() - rdd2 <- rdd for (i in 1:12) rdd2 <- lapplyPartitionsWithIndex( @@ -139,8 +117,6 @@ test_that("several transformations on RDD (a benchmark on PipelinedRDD)", { }) test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkpoint()", { - skip_on_cran() - # RDD rdd2 <- rdd # PipelinedRDD @@ -182,8 +158,6 @@ test_that("PipelinedRDD support actions: cache(), persist(), unpersist(), checkp }) test_that("reduce on RDD", { - skip_on_cran() - sum <- reduce(rdd, "+") expect_equal(sum, 55) @@ -193,8 +167,6 @@ test_that("reduce on RDD", { }) test_that("lapply with dependency", { - skip_on_cran() - fa <- 5 multiples <- lapply(rdd, function(x) { fa * x }) actual <- collectRDD(multiples) @@ -203,8 +175,6 @@ test_that("lapply with dependency", { }) test_that("lapplyPartitionsWithIndex on RDDs", { - skip_on_cran() - func <- function(partIndex, part) { list(partIndex, Reduce("+", part)) } actual <- collectRDD(lapplyPartitionsWithIndex(rdd, func), flatten = FALSE) expect_equal(actual, list(list(0, 15), list(1, 40))) @@ -221,14 +191,10 @@ test_that("lapplyPartitionsWithIndex on RDDs", { }) test_that("sampleRDD() on RDDs", { - skip_on_cran() - expect_equal(unlist(collectRDD(sampleRDD(rdd, FALSE, 1.0, 2014L))), nums) }) test_that("takeSample() on RDDs", { - skip_on_cran() - # ported from RDDSuite.scala, modified seeds data <- parallelize(sc, 1:100, 2L) for (seed in 4:5) { @@ -271,8 +237,6 @@ test_that("takeSample() on RDDs", { }) test_that("mapValues() on pairwise RDDs", { - skip_on_cran() - multiples <- mapValues(intRdd, function(x) { x * 2 }) actual <- collectRDD(multiples) expected <- lapply(intPairs, function(x) { @@ -282,8 +246,6 @@ test_that("mapValues() on pairwise RDDs", { }) test_that("flatMapValues() on pairwise RDDs", { - skip_on_cran() - l <- parallelize(sc, list(list(1, c(1, 2)), list(2, c(3, 4)))) actual <- collectRDD(flatMapValues(l, function(x) { x })) expect_equal(actual, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) @@ -296,8 +258,6 @@ test_that("flatMapValues() on pairwise RDDs", { }) test_that("reduceByKeyLocally() on PairwiseRDDs", { - skip_on_cran() - pairs <- parallelize(sc, list(list(1, 2), list(1.1, 3), list(1, 4)), 2L) actual <- reduceByKeyLocally(pairs, "+") expect_equal(sortKeyValueList(actual), @@ -311,8 +271,6 @@ test_that("reduceByKeyLocally() on PairwiseRDDs", { }) test_that("distinct() on RDDs", { - skip_on_cran() - nums.rep2 <- rep(1:10, 2) rdd.rep2 <- parallelize(sc, nums.rep2, 2L) uniques <- distinctRDD(rdd.rep2) @@ -321,29 +279,21 @@ test_that("distinct() on RDDs", { }) test_that("maximum() on RDDs", { - skip_on_cran() - max <- maximum(rdd) expect_equal(max, 10) }) test_that("minimum() on RDDs", { - skip_on_cran() - min <- minimum(rdd) expect_equal(min, 1) }) test_that("sumRDD() on RDDs", { - skip_on_cran() - sum <- sumRDD(rdd) expect_equal(sum, 55) }) test_that("keyBy on RDDs", { - skip_on_cran() - func <- function(x) { x * x } keys <- keyBy(rdd, func) actual <- collectRDD(keys) @@ -351,8 +301,6 @@ test_that("keyBy on RDDs", { }) test_that("repartition/coalesce on RDDs", { - skip_on_cran() - rdd <- parallelize(sc, 1:20, 4L) # each partition contains 5 elements # repartition @@ -374,8 +322,6 @@ test_that("repartition/coalesce on RDDs", { }) test_that("sortBy() on RDDs", { - skip_on_cran() - sortedRdd <- sortBy(rdd, function(x) { x * x }, ascending = FALSE) actual <- collectRDD(sortedRdd) expect_equal(actual, as.list(sort(nums, decreasing = TRUE))) @@ -387,8 +333,6 @@ test_that("sortBy() on RDDs", { }) test_that("takeOrdered() on RDDs", { - skip_on_cran() - l <- list(10, 1, 2, 9, 3, 4, 5, 6, 7) rdd <- parallelize(sc, l) actual <- takeOrdered(rdd, 6L) @@ -401,8 +345,6 @@ test_that("takeOrdered() on RDDs", { }) test_that("top() on RDDs", { - skip_on_cran() - l <- list(10, 1, 2, 9, 3, 4, 5, 6, 7) rdd <- parallelize(sc, l) actual <- top(rdd, 6L) @@ -415,8 +357,6 @@ test_that("top() on RDDs", { }) test_that("fold() on RDDs", { - skip_on_cran() - actual <- fold(rdd, 0, "+") expect_equal(actual, Reduce("+", nums, 0)) @@ -426,8 +366,6 @@ test_that("fold() on RDDs", { }) test_that("aggregateRDD() on RDDs", { - skip_on_cran() - rdd <- parallelize(sc, list(1, 2, 3, 4)) zeroValue <- list(0, 0) seqOp <- function(x, y) { list(x[[1]] + y, x[[2]] + 1) } @@ -441,8 +379,6 @@ test_that("aggregateRDD() on RDDs", { }) test_that("zipWithUniqueId() on RDDs", { - skip_on_cran() - rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) actual <- collectRDD(zipWithUniqueId(rdd)) expected <- list(list("a", 0), list("b", 1), list("c", 4), @@ -457,8 +393,6 @@ test_that("zipWithUniqueId() on RDDs", { }) test_that("zipWithIndex() on RDDs", { - skip_on_cran() - rdd <- parallelize(sc, list("a", "b", "c", "d", "e"), 3L) actual <- collectRDD(zipWithIndex(rdd)) expected <- list(list("a", 0), list("b", 1), list("c", 2), @@ -473,32 +407,24 @@ test_that("zipWithIndex() on RDDs", { }) test_that("glom() on RDD", { - skip_on_cran() - rdd <- parallelize(sc, as.list(1:4), 2L) actual <- collectRDD(glom(rdd)) expect_equal(actual, list(list(1, 2), list(3, 4))) }) test_that("keys() on RDDs", { - skip_on_cran() - keys <- keys(intRdd) actual <- collectRDD(keys) expect_equal(actual, lapply(intPairs, function(x) { x[[1]] })) }) test_that("values() on RDDs", { - skip_on_cran() - values <- values(intRdd) actual <- collectRDD(values) expect_equal(actual, lapply(intPairs, function(x) { x[[2]] })) }) test_that("pipeRDD() on RDDs", { - skip_on_cran() - actual <- collectRDD(pipeRDD(rdd, "more")) expected <- as.list(as.character(1:10)) expect_equal(actual, expected) @@ -516,8 +442,6 @@ test_that("pipeRDD() on RDDs", { }) test_that("zipRDD() on RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, 0:4, 2) rdd2 <- parallelize(sc, 1000:1004, 2) actual <- collectRDD(zipRDD(rdd1, rdd2)) @@ -547,8 +471,6 @@ test_that("zipRDD() on RDDs", { }) test_that("cartesian() on RDDs", { - skip_on_cran() - rdd <- parallelize(sc, 1:3) actual <- collectRDD(cartesian(rdd, rdd)) expect_equal(sortKeyValueList(actual), @@ -592,8 +514,6 @@ test_that("cartesian() on RDDs", { }) test_that("subtract() on RDDs", { - skip_on_cran() - l <- list(1, 1, 2, 2, 3, 4) rdd1 <- parallelize(sc, l) @@ -621,8 +541,6 @@ test_that("subtract() on RDDs", { }) test_that("subtractByKey() on pairwise RDDs", { - skip_on_cran() - l <- list(list("a", 1), list("b", 4), list("b", 5), list("a", 2)) rdd1 <- parallelize(sc, l) @@ -652,8 +570,6 @@ test_that("subtractByKey() on pairwise RDDs", { }) test_that("intersection() on RDDs", { - skip_on_cran() - # intersection with self actual <- collectRDD(intersection(rdd, rdd)) expect_equal(sort(as.integer(actual)), nums) @@ -670,8 +586,6 @@ test_that("intersection() on RDDs", { }) test_that("join() on pairwise RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) actual <- collectRDD(joinRDD(rdd1, rdd2, 2L)) @@ -696,8 +610,6 @@ test_that("join() on pairwise RDDs", { }) test_that("leftOuterJoin() on pairwise RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, list(list(1, 1), list(2, 4))) rdd2 <- parallelize(sc, list(list(1, 2), list(1, 3))) actual <- collectRDD(leftOuterJoin(rdd1, rdd2, 2L)) @@ -728,8 +640,6 @@ test_that("leftOuterJoin() on pairwise RDDs", { }) test_that("rightOuterJoin() on pairwise RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3))) rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) actual <- collectRDD(rightOuterJoin(rdd1, rdd2, 2L)) @@ -757,8 +667,6 @@ test_that("rightOuterJoin() on pairwise RDDs", { }) test_that("fullOuterJoin() on pairwise RDDs", { - skip_on_cran() - rdd1 <- parallelize(sc, list(list(1, 2), list(1, 3), list(3, 3))) rdd2 <- parallelize(sc, list(list(1, 1), list(2, 4))) actual <- collectRDD(fullOuterJoin(rdd1, rdd2, 2L)) @@ -790,8 +698,6 @@ test_that("fullOuterJoin() on pairwise RDDs", { }) test_that("sortByKey() on pairwise RDDs", { - skip_on_cran() - numPairsRdd <- map(rdd, function(x) { list (x, x) }) sortedRdd <- sortByKey(numPairsRdd, ascending = FALSE) actual <- collectRDD(sortedRdd) @@ -841,8 +747,6 @@ test_that("sortByKey() on pairwise RDDs", { }) test_that("collectAsMap() on a pairwise RDD", { - skip_on_cran() - rdd <- parallelize(sc, list(list(1, 2), list(3, 4))) vals <- collectAsMap(rdd) expect_equal(vals, list(`1` = 2, `3` = 4)) @@ -861,15 +765,11 @@ test_that("collectAsMap() on a pairwise RDD", { }) test_that("show()", { - skip_on_cran() - rdd <- parallelize(sc, list(1:10)) expect_output(showRDD(rdd), "ParallelCollectionRDD\\[\\d+\\] at parallelize at RRDD\\.scala:\\d+") }) test_that("sampleByKey() on pairwise RDDs", { - skip_on_cran() - rdd <- parallelize(sc, 1:2000) pairsRDD <- lapply(rdd, function(x) { if (x %% 2 == 0) list("a", x) else list("b", x) }) fractions <- list(a = 0.2, b = 0.1) @@ -894,8 +794,6 @@ test_that("sampleByKey() on pairwise RDDs", { }) test_that("Test correct concurrency of RRDD.compute()", { - skip_on_cran() - rdd <- parallelize(sc, 1:1000, 100) jrdd <- getJRDD(lapply(rdd, function(x) { x }), "row") zrdd <- callJMethod(jrdd, "zip", jrdd) diff --git a/R/pkg/tests/fulltests/test_shuffle.R b/R/pkg/tests/fulltests/test_shuffle.R index 18320ea44b389..98300c67c415f 100644 --- a/R/pkg/tests/fulltests/test_shuffle.R +++ b/R/pkg/tests/fulltests/test_shuffle.R @@ -37,8 +37,6 @@ strList <- list("Dexter Morgan: Blood. Sometimes it sets my teeth on edge and ", strListRDD <- parallelize(sc, strList, 4) test_that("groupByKey for integers", { - skip_on_cran() - grouped <- groupByKey(intRdd, 2L) actual <- collectRDD(grouped) @@ -48,8 +46,6 @@ test_that("groupByKey for integers", { }) test_that("groupByKey for doubles", { - skip_on_cran() - grouped <- groupByKey(doubleRdd, 2L) actual <- collectRDD(grouped) @@ -59,8 +55,6 @@ test_that("groupByKey for doubles", { }) test_that("reduceByKey for ints", { - skip_on_cran() - reduced <- reduceByKey(intRdd, "+", 2L) actual <- collectRDD(reduced) @@ -70,8 +64,6 @@ test_that("reduceByKey for ints", { }) test_that("reduceByKey for doubles", { - skip_on_cran() - reduced <- reduceByKey(doubleRdd, "+", 2L) actual <- collectRDD(reduced) @@ -80,8 +72,6 @@ test_that("reduceByKey for doubles", { }) test_that("combineByKey for ints", { - skip_on_cran() - reduced <- combineByKey(intRdd, function(x) { x }, "+", "+", 2L) actual <- collectRDD(reduced) @@ -91,8 +81,6 @@ test_that("combineByKey for ints", { }) test_that("combineByKey for doubles", { - skip_on_cran() - reduced <- combineByKey(doubleRdd, function(x) { x }, "+", "+", 2L) actual <- collectRDD(reduced) @@ -101,8 +89,6 @@ test_that("combineByKey for doubles", { }) test_that("combineByKey for characters", { - skip_on_cran() - stringKeyRDD <- parallelize(sc, list(list("max", 1L), list("min", 2L), list("other", 3L), list("max", 4L)), 2L) @@ -115,8 +101,6 @@ test_that("combineByKey for characters", { }) test_that("aggregateByKey", { - skip_on_cran() - # test aggregateByKey for int keys rdd <- parallelize(sc, list(list(1, 1), list(1, 2), list(2, 3), list(2, 4))) @@ -145,8 +129,6 @@ test_that("aggregateByKey", { }) test_that("foldByKey", { - skip_on_cran() - # test foldByKey for int keys folded <- foldByKey(intRdd, 0, "+", 2L) @@ -190,8 +172,6 @@ test_that("foldByKey", { }) test_that("partitionBy() partitions data correctly", { - skip_on_cran() - # Partition by magnitude partitionByMagnitude <- function(key) { if (key >= 3) 1 else 0 } @@ -207,8 +187,6 @@ test_that("partitionBy() partitions data correctly", { }) test_that("partitionBy works with dependencies", { - skip_on_cran() - kOne <- 1 partitionByParity <- function(key) { if (key %% 2 == kOne) 7 else 4 } @@ -227,8 +205,6 @@ test_that("partitionBy works with dependencies", { }) test_that("test partitionBy with string keys", { - skip_on_cran() - words <- flatMap(strListRDD, function(line) { strsplit(line, " ")[[1]] }) wordCount <- lapply(words, function(word) { list(word, 1L) }) diff --git a/R/pkg/tests/fulltests/test_sparkR.R b/R/pkg/tests/fulltests/test_sparkR.R index a40981c188f7a..f73fc6baeccef 100644 --- a/R/pkg/tests/fulltests/test_sparkR.R +++ b/R/pkg/tests/fulltests/test_sparkR.R @@ -18,8 +18,6 @@ context("functions in sparkR.R") test_that("sparkCheckInstall", { - skip_on_cran() - # "local, yarn-client, mesos-client" mode, SPARK_HOME was set correctly, # and the SparkR job was submitted by "spark-submit" sparkHome <- paste0(tempdir(), "/", "sparkHome") diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index c790d02b107be..af529067f43e0 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -61,7 +61,7 @@ unsetHiveContext <- function() { # Tests for SparkSQL functions in SparkR filesBefore <- list.files(path = sparkRDir, all.files = TRUE) -sparkSession <- if (not_cran_or_windows_with_hadoop()) { +sparkSession <- if (windows_with_hadoop()) { sparkR.session(master = sparkRTestMaster) } else { sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) @@ -100,26 +100,20 @@ mockLinesMapType <- c("{\"name\":\"Bob\",\"info\":{\"age\":16,\"height\":176.5}} mapTypeJsonPath <- tempfile(pattern = "sparkr-test", fileext = ".tmp") writeLines(mockLinesMapType, mapTypeJsonPath) -if (.Platform$OS.type == "windows") { +if (is_windows()) { Sys.setenv(TZ = "GMT") } test_that("calling sparkRSQL.init returns existing SQL context", { - skip_on_cran() - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) expect_equal(suppressWarnings(sparkRSQL.init(sc)), sqlContext) }) test_that("calling sparkRSQL.init returns existing SparkSession", { - skip_on_cran() - expect_equal(suppressWarnings(sparkRSQL.init(sc)), sparkSession) }) test_that("calling sparkR.session returns existing SparkSession", { - skip_on_cran() - expect_equal(sparkR.session(), sparkSession) }) @@ -217,8 +211,6 @@ test_that("structField type strings", { }) test_that("create DataFrame from RDD", { - skip_on_cran() - rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) df <- createDataFrame(rdd, list("a", "b")) dfAsDF <- as.DataFrame(rdd, list("a", "b")) @@ -316,8 +308,6 @@ test_that("create DataFrame from RDD", { }) test_that("createDataFrame uses files for large objects", { - skip_on_cran() - # To simulate a large file scenario, we set spark.r.maxAllocationLimit to a smaller value conf <- callJMethod(sparkSession, "conf") callJMethod(conf, "set", "spark.r.maxAllocationLimit", "100") @@ -330,7 +320,7 @@ test_that("createDataFrame uses files for large objects", { }) test_that("read/write csv as DataFrame", { - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv") mockLinesCsv <- c("year,make,model,comment,blank", "\"2012\",\"Tesla\",\"S\",\"No comment\",", @@ -380,8 +370,6 @@ test_that("read/write csv as DataFrame", { }) test_that("Support other types for options", { - skip_on_cran() - csvPath <- tempfile(pattern = "sparkr-test", fileext = ".csv") mockLinesCsv <- c("year,make,model,comment,blank", "\"2012\",\"Tesla\",\"S\",\"No comment\",", @@ -436,8 +424,6 @@ test_that("convert NAs to null type in DataFrames", { }) test_that("toDF", { - skip_on_cran() - rdd <- lapply(parallelize(sc, 1:10), function(x) { list(x, as.character(x)) }) df <- toDF(rdd, list("a", "b")) expect_is(df, "SparkDataFrame") @@ -549,8 +535,6 @@ test_that("create DataFrame with complex types", { }) test_that("create DataFrame from a data.frame with complex types", { - skip_on_cran() - ldf <- data.frame(row.names = 1:2) ldf$a_list <- list(list(1, 2), list(3, 4)) ldf$an_envir <- c(as.environment(list(a = 1, b = 2)), as.environment(list(c = 3))) @@ -563,8 +547,6 @@ test_that("create DataFrame from a data.frame with complex types", { }) test_that("Collect DataFrame with complex types", { - skip_on_cran() - # ArrayType df <- read.json(complexTypeJsonPath) ldf <- collect(df) @@ -607,7 +589,7 @@ test_that("Collect DataFrame with complex types", { }) test_that("read/write json files", { - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { # Test read.df df <- read.df(jsonPath, "json") expect_is(df, "SparkDataFrame") @@ -654,8 +636,6 @@ test_that("read/write json files", { }) test_that("read/write json files - compression option", { - skip_on_cran() - df <- read.df(jsonPath, "json") jsonPath <- tempfile(pattern = "jsonPath", fileext = ".json") @@ -669,8 +649,6 @@ test_that("read/write json files - compression option", { }) test_that("jsonRDD() on a RDD with json string", { - skip_on_cran() - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) rdd <- parallelize(sc, mockLines) expect_equal(countRDD(rdd), 3) @@ -730,8 +708,6 @@ test_that( }) test_that("test cache, uncache and clearCache", { - skip_on_cran() - df <- read.json(jsonPath) createOrReplaceTempView(df, "table1") cacheTable("table1") @@ -744,7 +720,7 @@ test_that("test cache, uncache and clearCache", { }) test_that("insertInto() on a registered table", { - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { df <- read.df(jsonPath, "json") write.df(df, parquetPath, "parquet", "overwrite") dfParquet <- read.df(parquetPath, "parquet") @@ -787,8 +763,6 @@ test_that("tableToDF() returns a new DataFrame", { }) test_that("toRDD() returns an RRDD", { - skip_on_cran() - df <- read.json(jsonPath) testRDD <- toRDD(df) expect_is(testRDD, "RDD") @@ -796,8 +770,6 @@ test_that("toRDD() returns an RRDD", { }) test_that("union on two RDDs created from DataFrames returns an RRDD", { - skip_on_cran() - df <- read.json(jsonPath) RDD1 <- toRDD(df) RDD2 <- toRDD(df) @@ -808,8 +780,6 @@ test_that("union on two RDDs created from DataFrames returns an RRDD", { }) test_that("union on mixed serialization types correctly returns a byte RRDD", { - skip_on_cran() - # Byte RDD nums <- 1:10 rdd <- parallelize(sc, nums, 2L) @@ -839,8 +809,6 @@ test_that("union on mixed serialization types correctly returns a byte RRDD", { }) test_that("objectFile() works with row serialization", { - skip_on_cran() - objectPath <- tempfile(pattern = "spark-test", fileext = ".tmp") df <- read.json(jsonPath) dfRDD <- toRDD(df) @@ -853,8 +821,6 @@ test_that("objectFile() works with row serialization", { }) test_that("lapply() on a DataFrame returns an RDD with the correct columns", { - skip_on_cran() - df <- read.json(jsonPath) testRDD <- lapply(df, function(row) { row$newCol <- row$age + 5 @@ -923,8 +889,6 @@ test_that("collect() support Unicode characters", { }) test_that("multiple pipeline transformations result in an RDD with the correct values", { - skip_on_cran() - df <- read.json(jsonPath) first <- lapply(df, function(row) { row$age <- row$age + 5 @@ -964,7 +928,7 @@ test_that("cache(), storageLevel(), persist(), and unpersist() on a DataFrame", }) test_that("setCheckpointDir(), checkpoint() on a DataFrame", { - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { checkpointDir <- file.path(tempdir(), "cproot") expect_true(length(list.files(path = checkpointDir, all.files = TRUE)) == 0) @@ -1341,7 +1305,7 @@ test_that("column calculation", { }) test_that("test HiveContext", { - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { setHiveContext(sc) schema <- structType(structField("name", "string"), structField("age", "integer"), @@ -1395,8 +1359,6 @@ test_that("column operators", { }) test_that("column functions", { - skip_on_cran() - c <- column("a") c1 <- abs(c) + acos(c) + approxCountDistinct(c) + ascii(c) + asin(c) + atan(c) c2 <- avg(c) + base64(c) + bin(c) + bitwiseNOT(c) + cbrt(c) + ceil(c) + cos(c) @@ -1782,8 +1744,6 @@ test_that("when(), otherwise() and ifelse() with column on a DataFrame", { }) test_that("group by, agg functions", { - skip_on_cran() - df <- read.json(jsonPath) df1 <- agg(df, name = "max", age = "sum") expect_equal(1, count(df1)) @@ -2125,8 +2085,6 @@ test_that("filter() on a DataFrame", { }) test_that("join(), crossJoin() and merge() on a DataFrame", { - skip_on_cran() - df <- read.json(jsonPath) mockLines2 <- c("{\"name\":\"Michael\", \"test\": \"yes\"}", @@ -2400,8 +2358,6 @@ test_that("mutate(), transform(), rename() and names()", { }) test_that("read/write ORC files", { - skip_on_cran() - setHiveContext(sc) df <- read.df(jsonPath, "json") @@ -2423,8 +2379,6 @@ test_that("read/write ORC files", { }) test_that("read/write ORC files - compression option", { - skip_on_cran() - setHiveContext(sc) df <- read.df(jsonPath, "json") @@ -2440,7 +2394,7 @@ test_that("read/write ORC files - compression option", { }) test_that("read/write Parquet files", { - if (not_cran_or_windows_with_hadoop()) { + if (windows_with_hadoop()) { df <- read.df(jsonPath, "json") # Test write.df and read.df write.df(df, parquetPath, "parquet", mode = "overwrite") @@ -2473,8 +2427,6 @@ test_that("read/write Parquet files", { }) test_that("read/write Parquet files - compression option/mode", { - skip_on_cran() - df <- read.df(jsonPath, "json") tempPath <- tempfile(pattern = "tempPath", fileext = ".parquet") @@ -2492,8 +2444,6 @@ test_that("read/write Parquet files - compression option/mode", { }) test_that("read/write text files", { - skip_on_cran() - # Test write.df and read.df df <- read.df(jsonPath, "text") expect_is(df, "SparkDataFrame") @@ -2515,8 +2465,6 @@ test_that("read/write text files", { }) test_that("read/write text files - compression option", { - skip_on_cran() - df <- read.df(jsonPath, "text") textPath <- tempfile(pattern = "textPath", fileext = ".txt") @@ -2750,8 +2698,6 @@ test_that("approxQuantile() on a DataFrame", { }) test_that("SQL error message is returned from JVM", { - skip_on_cran() - retError <- tryCatch(sql("select * from blah"), error = function(e) e) expect_equal(grepl("Table or view not found", retError), TRUE) expect_equal(grepl("blah", retError), TRUE) @@ -2760,8 +2706,6 @@ test_that("SQL error message is returned from JVM", { irisDF <- suppressWarnings(createDataFrame(iris)) test_that("Method as.data.frame as a synonym for collect()", { - skip_on_cran() - expect_equal(as.data.frame(irisDF), collect(irisDF)) irisDF2 <- irisDF[irisDF$Species == "setosa", ] expect_equal(as.data.frame(irisDF2), collect(irisDF2)) @@ -2984,8 +2928,6 @@ test_that("dapply() and dapplyCollect() on a DataFrame", { }) test_that("dapplyCollect() on DataFrame with a binary column", { - skip_on_cran() - df <- data.frame(key = 1:3) df$bytes <- lapply(df$key, serialize, connection = NULL) @@ -3006,8 +2948,6 @@ test_that("dapplyCollect() on DataFrame with a binary column", { }) test_that("repartition by columns on DataFrame", { - skip_on_cran() - df <- createDataFrame( list(list(1L, 1, "1", 0.1), list(1L, 2, "2", 0.2), list(3L, 3, "3", 0.3)), c("a", "b", "c", "d")) @@ -3046,8 +2986,6 @@ test_that("repartition by columns on DataFrame", { }) test_that("coalesce, repartition, numPartitions", { - skip_on_cran() - df <- as.DataFrame(cars, numPartitions = 5) expect_equal(getNumPartitions(df), 5) expect_equal(getNumPartitions(coalesce(df, 3)), 3) @@ -3067,8 +3005,6 @@ test_that("coalesce, repartition, numPartitions", { }) test_that("gapply() and gapplyCollect() on a DataFrame", { - skip_on_cran() - df <- createDataFrame ( list(list(1L, 1, "1", 0.1), list(1L, 2, "1", 0.2), list(3L, 3, "3", 0.3)), c("a", "b", "c", "d")) @@ -3186,8 +3122,6 @@ test_that("Window functions on a DataFrame", { }) test_that("createDataFrame sqlContext parameter backward compatibility", { - skip_on_cran() - sqlContext <- suppressWarnings(sparkRSQL.init(sc)) a <- 1:3 b <- c("a", "b", "c") @@ -3221,8 +3155,6 @@ test_that("createDataFrame sqlContext parameter backward compatibility", { }) test_that("randomSplit", { - skip_on_cran() - num <- 4000 df <- createDataFrame(data.frame(id = 1:num)) weights <- c(2, 3, 5) @@ -3269,8 +3201,6 @@ test_that("Setting and getting config on SparkSession, sparkR.conf(), sparkR.uiW }) test_that("enableHiveSupport on SparkSession", { - skip_on_cran() - setHiveContext(sc) unsetHiveContext() # if we are still here, it must be built with hive @@ -3286,8 +3216,6 @@ test_that("Spark version from SparkSession", { }) test_that("Call DataFrameWriter.save() API in Java without path and check argument types", { - skip_on_cran() - df <- read.df(jsonPath, "json") # This tests if the exception is thrown from JVM not from SparkR side. # It makes sure that we can omit path argument in write.df API and then it calls @@ -3314,8 +3242,6 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume }) test_that("Call DataFrameWriter.load() API in Java without path and check argument types", { - skip_on_cran() - # This tests if the exception is thrown from JVM not from SparkR side. # It makes sure that we can omit path argument in read.df API and then it calls # DataFrameWriter.load() without path. @@ -3440,8 +3366,6 @@ compare_list <- function(list1, list2) { # This should always be the **very last test** in this test file. test_that("No extra files are created in SPARK_HOME by starting session and making calls", { - skip_on_cran() # skip because when run from R CMD check SPARK_HOME is not the current directory - # Check that it is not creating any extra file. # Does not check the tempdir which would be cleaned up after. filesAfter <- list.files(path = sparkRDir, all.files = TRUE) diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R index b20b4312fbaae..d691de7cd725d 100644 --- a/R/pkg/tests/fulltests/test_streaming.R +++ b/R/pkg/tests/fulltests/test_streaming.R @@ -24,7 +24,7 @@ context("Structured Streaming") sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FALSE) jsonSubDir <- file.path("sparkr-test", "json", "") -if (.Platform$OS.type == "windows") { +if (is_windows()) { # file.path removes the empty separator on Windows, adds it back jsonSubDir <- paste0(jsonSubDir, .Platform$file.sep) } @@ -47,8 +47,6 @@ schema <- structType(structField("name", "string"), structField("count", "double")) test_that("read.stream, write.stream, awaitTermination, stopQuery", { - skip_on_cran() - df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1) expect_true(isStreaming(df)) counts <- count(group_by(df, "name")) @@ -69,8 +67,6 @@ test_that("read.stream, write.stream, awaitTermination, stopQuery", { }) test_that("print from explain, lastProgress, status, isActive", { - skip_on_cran() - df <- read.stream("json", path = jsonDir, schema = schema) expect_true(isStreaming(df)) counts <- count(group_by(df, "name")) @@ -90,8 +86,6 @@ test_that("print from explain, lastProgress, status, isActive", { }) test_that("Stream other format", { - skip_on_cran() - parquetPath <- tempfile(pattern = "sparkr-test", fileext = ".parquet") df <- read.df(jsonPath, "json", schema) write.df(df, parquetPath, "parquet", "overwrite") @@ -118,8 +112,6 @@ test_that("Stream other format", { }) test_that("Non-streaming DataFrame", { - skip_on_cran() - c <- as.DataFrame(cars) expect_false(isStreaming(c)) @@ -129,8 +121,6 @@ test_that("Non-streaming DataFrame", { }) test_that("Unsupported operation", { - skip_on_cran() - # memory sink without aggregation df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1) expect_error(write.stream(df, "memory", queryName = "people", outputMode = "complete"), @@ -139,8 +129,6 @@ test_that("Unsupported operation", { }) test_that("Terminated by error", { - skip_on_cran() - df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = -1) counts <- count(group_by(df, "name")) # This would not fail before returning with a StreamingQuery, diff --git a/R/pkg/tests/fulltests/test_take.R b/R/pkg/tests/fulltests/test_take.R index c00723ba31f4c..8936cc57da227 100644 --- a/R/pkg/tests/fulltests/test_take.R +++ b/R/pkg/tests/fulltests/test_take.R @@ -34,8 +34,6 @@ sparkSession <- sparkR.session(master = sparkRTestMaster, enableHiveSupport = FA sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", sparkSession) test_that("take() gives back the original elements in correct count and order", { - skip_on_cran() - numVectorRDD <- parallelize(sc, numVector, 10) # case: number of elements to take is less than the size of the first partition expect_equal(takeRDD(numVectorRDD, 1), as.list(head(numVector, n = 1))) diff --git a/R/pkg/tests/fulltests/test_textFile.R b/R/pkg/tests/fulltests/test_textFile.R index e8a961cb3e870..be2d2711ff88e 100644 --- a/R/pkg/tests/fulltests/test_textFile.R +++ b/R/pkg/tests/fulltests/test_textFile.R @@ -24,8 +24,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", mockFile <- c("Spark is pretty.", "Spark is awesome.") test_that("textFile() on a local file returns an RDD", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) @@ -38,8 +36,6 @@ test_that("textFile() on a local file returns an RDD", { }) test_that("textFile() followed by a collect() returns the same content", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) @@ -50,8 +46,6 @@ test_that("textFile() followed by a collect() returns the same content", { }) test_that("textFile() word count works as expected", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) @@ -70,8 +64,6 @@ test_that("textFile() word count works as expected", { }) test_that("several transformations on RDD created by textFile()", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) @@ -86,8 +78,6 @@ test_that("several transformations on RDD created by textFile()", { }) test_that("textFile() followed by a saveAsTextFile() returns the same content", { - skip_on_cran() - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) @@ -102,8 +92,6 @@ test_that("textFile() followed by a saveAsTextFile() returns the same content", }) test_that("saveAsTextFile() on a parallelized list works as expected", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") l <- list(1, 2, 3) rdd <- parallelize(sc, l, 1L) @@ -115,8 +103,6 @@ test_that("saveAsTextFile() on a parallelized list works as expected", { }) test_that("textFile() and saveAsTextFile() word count works as expected", { - skip_on_cran() - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName1) @@ -142,8 +128,6 @@ test_that("textFile() and saveAsTextFile() word count works as expected", { }) test_that("textFile() on multiple paths", { - skip_on_cran() - fileName1 <- tempfile(pattern = "spark-test", fileext = ".tmp") fileName2 <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines("Spark is pretty.", fileName1) @@ -157,8 +141,6 @@ test_that("textFile() on multiple paths", { }) test_that("Pipelined operations on RDDs created using textFile", { - skip_on_cran() - fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") writeLines(mockFile, fileName) diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R index 6197ae7569879..af81423aa8dd0 100644 --- a/R/pkg/tests/fulltests/test_utils.R +++ b/R/pkg/tests/fulltests/test_utils.R @@ -23,7 +23,6 @@ sc <- callJStatic("org.apache.spark.sql.api.r.SQLUtils", "getJavaSparkContext", test_that("convertJListToRList() gives back (deserializes) the original JLists of strings and integers", { - skip_on_cran() # It's hard to manually create a Java List using rJava, since it does not # support generics well. Instead, we rely on collectRDD() returning a # JList. @@ -41,7 +40,6 @@ test_that("convertJListToRList() gives back (deserializes) the original JLists }) test_that("serializeToBytes on RDD", { - skip_on_cran() # File content mockFile <- c("Spark is pretty.", "Spark is awesome.") fileName <- tempfile(pattern = "spark-test", fileext = ".tmp") @@ -169,8 +167,6 @@ test_that("convertToJSaveMode", { }) test_that("captureJVMException", { - skip_on_cran() - method <- "createStructField" expect_error(tryCatch(callJStatic("org.apache.spark.sql.api.r.SQLUtils", method, "col", "unknown", TRUE), @@ -181,8 +177,6 @@ test_that("captureJVMException", { }) test_that("hashCode", { - skip_on_cran() - expect_error(hashCode("bc53d3605e8a5b7de1e8e271c2317645"), NA) }) @@ -243,6 +237,3 @@ test_that("basenameSansExtFromUrl", { }) sparkR.session.stop() - -message("--- End test (utils) ", as.POSIXct(Sys.time(), tz = "GMT")) -message("elapsed ", (proc.time() - timer_ptm)[3]) diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index d48e36c880c13..f00a610679752 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -24,8 +24,6 @@ options("warn" = 2) if (.Platform$OS.type == "windows") { Sys.setenv(TZ = "GMT") } -message("--- Start test ", as.POSIXct(Sys.time(), tz = "GMT")) -timer_ptm <- proc.time() # Setup global test environment # Install Spark first to set SPARK_HOME