-
Notifications
You must be signed in to change notification settings - Fork 28.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-24187][R][SQL]Add array_join function to SparkR #21313
Changes from 3 commits
0c6ca7d
01858f2
e05e701
92c41c5
901ff32
b0b1415
55b4518
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -221,7 +221,9 @@ NULL | |
#' head(select(tmp3, element_at(tmp3$v3, "Valiant"))) | ||
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp)) | ||
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5))) | ||
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))} | ||
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp))) | ||
#' tmp5 <- mutate(df, v6 = create_array(df$model, df$model)) | ||
#' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))} | ||
NULL | ||
|
||
#' Window functions for Column operations | ||
|
@@ -3006,6 +3008,28 @@ setMethod("array_contains", | |
column(jc) | ||
}) | ||
|
||
#' @details | ||
#' \code{array_join}: Concatenates the elements of column using the delimiter. | ||
#' Null values are replaced with nullReplacement if set, otherwise they are ignored. | ||
#' | ||
#' @param delimiter character(s) to use to concatenate the elements of column. | ||
#' @param nullReplacement character(s) to use to replace the Null values. | ||
#' @rdname column_collection_functions | ||
#' @aliases array_join array_join,Column-method | ||
#' @note array_join since 2.4.0 | ||
setMethod("array_join", | ||
signature(x = "Column"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @felixcheung
|
||
function(x, delimiter, nullReplacement = NA) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wait.. why is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @felixcheung |
||
jc <- if (is.na(nullReplacement)) { | ||
callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, delimiter) | ||
} | ||
else { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: |
||
callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, delimiter, | ||
nullReplacement) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. re #21313 (comment) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's |
||
} | ||
column(jc) | ||
}) | ||
|
||
#' @details | ||
#' \code{array_max}: Returns the maximum value of the array. | ||
#' | ||
|
@@ -3197,8 +3221,8 @@ setMethod("size", | |
#' (or starting from the end if start is negative) with the specified length. | ||
#' | ||
#' @rdname column_collection_functions | ||
#' @param start an index indicating the first element occuring in the result. | ||
#' @param length a number of consecutive elements choosen to the result. | ||
#' @param start an index indicating the first element occurring in the result. | ||
#' @param length a number of consecutive elements chosen to the result. | ||
#' @aliases slice slice,Column-method | ||
#' @note slice since 2.4.0 | ||
setMethod("slice", | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1518,6 +1518,16 @@ test_that("column functions", { | |
result <- collect(select(df, arrays_overlap(df[[1]], df[[2]])))[[1]] | ||
expect_equal(result, c(TRUE, FALSE, NA)) | ||
|
||
# Test array_join() | ||
df <- createDataFrame(list(list(list("Hello", "World!")))) | ||
result <- collect(select(df, array_join(df[[1]], "#")))[[1]] | ||
expect_equal(result, "Hello#World!") | ||
df2 <- createDataFrame(list(list(list("Hello", NA, "World!")))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does it work with NULL? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @HyukjinKwon Thank you very much for your review. I will add a test for NULL and also change the }. |
||
result <- collect(select(df2, array_join(df2[[1]], "#", "Beautiful")))[[1]] | ||
expect_equal(result, "Hello#Beautiful#World!") | ||
result <- collect(select(df2, array_join(df2[[1]], "#")))[[1]] | ||
expect_equal(result, "Hello#World!") | ||
|
||
# Test array_sort() and sort_array() | ||
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L)))) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't check scala - what's "(s)" here in "character(s)" mean? I ask because "character" refers to the type in R
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@felixcheung scala doesn't have a doc for param delimiter. I added this myself. What I am trying to say is "one or more characters". I will change to "a character string" so it will be
Does this look ok to you?