Skip to content

Commit

Permalink
Merge branch 'master' into configTimeout-6980
Browse files Browse the repository at this point in the history
  • Loading branch information
BryanCutler committed Jun 24, 2015
2 parents 039afed + f04b567 commit be11c4e
Show file tree
Hide file tree
Showing 672 changed files with 20,691 additions and 10,548 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ scalastyle-output.xml
R-unit-tests.log
R/unit-tests.out
python/lib/pyspark.zip
lint-r-report.log

# For Hive
metastore_db/
Expand Down
4 changes: 4 additions & 0 deletions .rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ spark-env.sh
spark-env.cmd
spark-env.sh.template
log4j-defaults.properties
log4j-defaults-repl.properties
bootstrap-tooltip.js
jquery-1.11.1.min.js
d3.min.js
Expand Down Expand Up @@ -80,6 +81,9 @@ local-1425081759269/*
local-1426533911241/*
local-1426633911242/*
local-1430917381534/*
local-1430917381535_1
local-1430917381535_2
DESCRIPTION
NAMESPACE
test_support/*
.lintr
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -950,3 +950,4 @@ The following components are provided under the MIT License. See project link fo
(MIT License) scopt (com.github.scopt:scopt_2.10:3.2.0 - https://github.com/scopt/scopt)
(The MIT License) Mockito (org.mockito:mockito-all:1.8.5 - http://www.mockito.org)
(MIT License) jquery (https://jquery.org/license/)
(MIT License) AnchorJS (https://github.com/bryanbraun/anchorjs)
8 changes: 4 additions & 4 deletions R/create-docs.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
# After running this script the html docs can be found in
# $SPARK_HOME/R/pkg/html

set -o pipefail
set -e

# Figure out where the script is
export FWDIR="$(cd "`dirname "$0"`"; pwd)"
pushd $FWDIR

# Generate Rd file
Rscript -e 'library(devtools); devtools::document(pkg="./pkg", roclets=c("rd"))'

# Install the package
# Install the package (this will also generate the Rd files)
./install-dev.sh

# Now create HTML files
Expand Down
11 changes: 10 additions & 1 deletion R/install-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,20 @@
# NOTE(shivaram): Right now we use $SPARK_HOME/R/lib to be the installation directory
# to load the SparkR package on the worker nodes.

set -o pipefail
set -e

FWDIR="$(cd `dirname $0`; pwd)"
LIB_DIR="$FWDIR/lib"

mkdir -p $LIB_DIR

# Install R
pushd $FWDIR

# Generate Rd files if devtools is installed
Rscript -e ' if("devtools" %in% rownames(installed.packages())) { library(devtools); devtools::document(pkg="./pkg", roclets=c("rd")) }'

# Install SparkR to $LIB_DIR
R CMD INSTALL --library=$LIB_DIR $FWDIR/pkg/

popd
2 changes: 1 addition & 1 deletion R/log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
log4j.rootCategory=INFO, file
log4j.appender.file=org.apache.log4j.FileAppender
log4j.appender.file.append=true
log4j.appender.file.file=R-unit-tests.log
log4j.appender.file.file=R/target/unit-tests.log
log4j.appender.file.layout=org.apache.log4j.PatternLayout
log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n

Expand Down
2 changes: 2 additions & 0 deletions R/pkg/.lintr
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
linters: with_defaults(line_length_linter(100), camel_case_linter = NULL, open_curly_linter(allow_single_line = TRUE), closed_curly_linter(allow_single_line = TRUE))
exclusions: list("inst/profile/general.R" = 1, "inst/profile/shell.R")
5 changes: 5 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ export("sparkR.init")
export("sparkR.stop")
export("print.jobj")

# Job group lifecycle management methods
export("setJobGroup",
"clearJobGroup",
"cancelJobGroup")

exportClasses("DataFrame")

exportMethods("arrange",
Expand Down
96 changes: 48 additions & 48 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ setClass("DataFrame",
setMethod("initialize", "DataFrame", function(.Object, sdf, isCached) {
.Object@env <- new.env()
.Object@env$isCached <- isCached

.Object@sdf <- sdf
.Object
})
Expand All @@ -55,11 +55,11 @@ dataFrame <- function(sdf, isCached = FALSE) {
############################ DataFrame Methods ##############################################

#' Print Schema of a DataFrame
#'
#'
#' Prints out the schema in tree format
#'
#'
#' @param x A SparkSQL DataFrame
#'
#'
#' @rdname printSchema
#' @export
#' @examples
Expand All @@ -78,11 +78,11 @@ setMethod("printSchema",
})

#' Get schema object
#'
#'
#' Returns the schema of this DataFrame as a structType object.
#'
#'
#' @param x A SparkSQL DataFrame
#'
#'
#' @rdname schema
#' @export
#' @examples
Expand All @@ -100,9 +100,9 @@ setMethod("schema",
})

#' Explain
#'
#'
#' Print the logical and physical Catalyst plans to the console for debugging.
#'
#'
#' @param x A SparkSQL DataFrame
#' @param extended Logical. If extended is False, explain() only prints the physical plan.
#' @rdname explain
Expand Down Expand Up @@ -200,11 +200,11 @@ setMethod("show", "DataFrame",
})

#' DataTypes
#'
#'
#' Return all column names and their data types as a list
#'
#'
#' @param x A SparkSQL DataFrame
#'
#'
#' @rdname dtypes
#' @export
#' @examples
Expand All @@ -224,11 +224,11 @@ setMethod("dtypes",
})

#' Column names
#'
#'
#' Return all column names as a list
#'
#'
#' @param x A SparkSQL DataFrame
#'
#'
#' @rdname columns
#' @export
#' @examples
Expand Down Expand Up @@ -256,12 +256,12 @@ setMethod("names",
})

#' Register Temporary Table
#'
#'
#' Registers a DataFrame as a Temporary Table in the SQLContext
#'
#'
#' @param x A SparkSQL DataFrame
#' @param tableName A character vector containing the name of the table
#'
#'
#' @rdname registerTempTable
#' @export
#' @examples
Expand Down Expand Up @@ -306,11 +306,11 @@ setMethod("insertInto",
})

#' Cache
#'
#'
#' Persist with the default storage level (MEMORY_ONLY).
#'
#'
#' @param x A SparkSQL DataFrame
#'
#'
#' @rdname cache-methods
#' @export
#' @examples
Expand Down Expand Up @@ -400,7 +400,7 @@ setMethod("repartition",
signature(x = "DataFrame", numPartitions = "numeric"),
function(x, numPartitions) {
sdf <- callJMethod(x@sdf, "repartition", numToInt(numPartitions))
dataFrame(sdf)
dataFrame(sdf)
})

# toJSON
Expand Down Expand Up @@ -489,7 +489,7 @@ setMethod("distinct",
#' sqlContext <- sparkRSQL.init(sc)
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
#' collect(sample(df, FALSE, 0.5))
#' collect(sample(df, FALSE, 0.5))
#' collect(sample(df, TRUE, 0.5))
#'}
setMethod("sample",
Expand All @@ -513,11 +513,11 @@ setMethod("sample_frac",
})

#' Count
#'
#'
#' Returns the number of rows in a DataFrame
#'
#'
#' @param x A SparkSQL DataFrame
#'
#'
#' @rdname count
#' @export
#' @examples
Expand Down Expand Up @@ -568,13 +568,13 @@ setMethod("collect",
})

#' Limit
#'
#'
#' Limit the resulting DataFrame to the number of rows specified.
#'
#'
#' @param x A SparkSQL DataFrame
#' @param num The number of rows to return
#' @return A new DataFrame containing the number of rows specified.
#'
#'
#' @rdname limit
#' @export
#' @examples
Expand All @@ -593,7 +593,7 @@ setMethod("limit",
})

#' Take the first NUM rows of a DataFrame and return a the results as a data.frame
#'
#'
#' @rdname take
#' @export
#' @examples
Expand All @@ -613,8 +613,8 @@ setMethod("take",

#' Head
#'
#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
#' then head() returns the first 6 rows in keeping with the current data.frame
#' Return the first NUM rows of a DataFrame as a data.frame. If NUM is NULL,
#' then head() returns the first 6 rows in keeping with the current data.frame
#' convention in R.
#'
#' @param x A SparkSQL DataFrame
Expand Down Expand Up @@ -659,11 +659,11 @@ setMethod("first",
})

# toRDD()
#
#
# Converts a Spark DataFrame to an RDD while preserving column names.
#
#
# @param x A Spark DataFrame
#
#
# @rdname DataFrame
# @export
# @examples
Expand Down Expand Up @@ -1167,7 +1167,7 @@ setMethod("where",
#'
#' @param x A Spark DataFrame
#' @param y A Spark DataFrame
#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
#' @param joinExpr (Optional) The expression used to perform the join. joinExpr must be a
#' Column expression. If joinExpr is omitted, join() wil perform a Cartesian join
#' @param joinType The type of join to perform. The following join types are available:
#' 'inner', 'outer', 'left_outer', 'right_outer', 'semijoin'. The default joinType is "inner".
Expand Down Expand Up @@ -1303,7 +1303,7 @@ setMethod("except",
#' @param source A name for external data source
#' @param mode One of 'append', 'overwrite', 'error', 'ignore'
#'
#' @rdname write.df
#' @rdname write.df
#' @export
#' @examples
#'\dontrun{
Expand Down Expand Up @@ -1401,7 +1401,7 @@ setMethod("saveAsTable",
#' @param col A string of name
#' @param ... Additional expressions
#' @return A DataFrame
#' @rdname describe
#' @rdname describe
#' @export
#' @examples
#'\dontrun{
Expand Down Expand Up @@ -1444,7 +1444,7 @@ setMethod("describe",
#' This overwrites the how parameter.
#' @param cols Optional list of column names to consider.
#' @return A DataFrame
#'
#'
#' @rdname nafunctions
#' @export
#' @examples
Expand All @@ -1465,7 +1465,7 @@ setMethod("dropna",
if (is.null(minNonNulls)) {
minNonNulls <- if (how == "any") { length(cols) } else { 1 }
}

naFunctions <- callJMethod(x@sdf, "na")
sdf <- callJMethod(naFunctions, "drop",
as.integer(minNonNulls), listToSeq(as.list(cols)))
Expand All @@ -1488,16 +1488,16 @@ setMethod("na.omit",
#' @param value Value to replace null values with.
#' Should be an integer, numeric, character or named list.
#' If the value is a named list, then cols is ignored and
#' value must be a mapping from column name (character) to
#' value must be a mapping from column name (character) to
#' replacement value. The replacement value must be an
#' integer, numeric or character.
#' @param cols optional list of column names to consider.
#' Columns specified in cols that do not have matching data
#' type are ignored. For example, if value is a character, and
#' type are ignored. For example, if value is a character, and
#' subset contains a non-character column, then the non-character
#' column is simply ignored.
#' @return A DataFrame
#'
#'
#' @rdname nafunctions
#' @export
#' @examples
Expand All @@ -1515,14 +1515,14 @@ setMethod("fillna",
if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
stop("value should be an integer, numeric, charactor or named list.")
}

if (class(value) == "list") {
# Check column names in the named list
colNames <- names(value)
if (length(colNames) == 0 || !all(colNames != "")) {
stop("value should be an a named list with each name being a column name.")
}

# Convert to the named list to an environment to be passed to JVM
valueMap <- new.env()
for (col in colNames) {
Expand All @@ -1533,19 +1533,19 @@ setMethod("fillna",
}
valueMap[[col]] <- v
}

# When value is a named list, caller is expected not to pass in cols
if (!is.null(cols)) {
warning("When value is a named list, cols is ignored!")
cols <- NULL
}

value <- valueMap
} else if (is.integer(value)) {
# Cast an integer to a numeric
value <- as.numeric(value)
}

naFunctions <- callJMethod(x@sdf, "na")
sdf <- if (length(cols) == 0) {
callJMethod(naFunctions, "fill", value)
Expand Down
Loading

0 comments on commit be11c4e

Please sign in to comment.