Skip to content

Commit

Permalink
ARROW-12696: [R] Improve testing of error messages converted to warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
nealrichardson committed May 10, 2021
1 parent 34dc1e6 commit a0914f6
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 88 deletions.
16 changes: 14 additions & 2 deletions r/R/dplyr-eval.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ arrow_eval <- function (expr, mask) {
if (grepl(patterns, msg)) {
stop(e)
}
invisible(structure(msg, class = "try-error", condition = e))

out <- structure(msg, class = "try-error", condition = e)
if (grepl("not supported.*Arrow", msg)) {
# One of ours. Mark it so that consumers can handle it differently
class(out) <- c("arrow-try-error", class(out))
}
invisible(out)
})
}

Expand All @@ -51,6 +57,12 @@ i18ize_error_messages <- function() {
paste(map(out, ~sub("X_____X", ".*", .)), collapse = "|")
}

# Helper to raise a common error
arrow_not_supported <- function(msg) {
# TODO: raise a classed error?
stop(paste(msg, "not supported by Arrow"), call. = FALSE)
}

# Create a data mask for evaluating a dplyr expression
arrow_mask <- function(.data) {
f_env <- new_environment(.cache$functions)
Expand All @@ -73,4 +85,4 @@ arrow_mask <- function(.data) {
# (because if we do we get `Error: Can't modify the data pronoun` in mutate())
out$.data <- .data$selected_columns
out
}
}
25 changes: 11 additions & 14 deletions r/R/dplyr-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,14 @@ nse_funcs$as.numeric = function(x) {
# String functions
nse_funcs$nchar = function(x, type = "chars", allowNA = FALSE, keepNA = NA) {
if (allowNA) {
stop("allowNA = TRUE not supported for Arrow", call. = FALSE)
arrow_not_supported("allowNA = TRUE")
}
if (is.na(keepNA)) {
keepNA <- !identical(type, "width")
}
if (!keepNA) {
# TODO: I think there is a fill_null kernel we could use, set null to 2
stop("keepNA = TRUE not supported for Arrow", call. = FALSE)
arrow_not_supported("keepNA = TRUE")
}
if (identical(type, "bytes")) {
Expression$create("binary_length", x)
Expand Down Expand Up @@ -210,7 +210,7 @@ nse_funcs$strsplit <- function(x,
# to see if it is a regex (if it contains any regex metacharacters). If not,
# then allow to proceed.
if (!fixed && contains_regex(split)) {
stop("Regular expression matching not supported in strsplit for Arrow", call. = FALSE)
arrow_not_supported("Regular expression matching in strsplit()")
}
# warn when the user specifies both fixed = TRUE and perl = TRUE, for
# consistency with the behavior of base::strsplit()
Expand All @@ -230,13 +230,13 @@ nse_funcs$strsplit <- function(x,
nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
opts <- get_stringr_pattern_options(enexpr(pattern))
if (!opts$fixed && contains_regex(opts$pattern)) {
stop("Regular expression matching not supported in str_split() for Arrow", call. = FALSE)
arrow_not_supported("Regular expression matching in str_split()")
}
if (opts$ignore_case) {
stop("Case-insensitive string splitting not supported in Arrow", call. = FALSE)
arrow_not_supported("Case-insensitive string splitting")
}
if (n == 0) {
stop("Splitting strings into zero parts not supported in Arrow" , call. = FALSE)
arrow_not_supported("Splitting strings into zero parts")
}
if (identical(n, Inf)) {
n <- 0L
Expand All @@ -260,7 +260,6 @@ nse_funcs$str_split <- function(string, pattern, n = Inf, simplify = FALSE) {
)
}


# String function helpers

# format `pattern` as needed for case insensitivity and literal matching by RE2
Expand Down Expand Up @@ -316,13 +315,11 @@ get_stringr_pattern_options <- function(pattern) {
check_dots(...)
list(pattern = pattern, fixed = FALSE, ignore_case = ignore_case)
}
coll <- boundary <- function(...) {
stop(
"Pattern modifier `",
match.call()[[1]],
"()` is not supported in Arrow",
call. = FALSE
)
coll <- function(...) {
arrow_not_supported("Pattern modifier `coll()`")
}
boundary <- function(...) {
arrow_not_supported("Pattern modifier `boundary()`")
}
check_dots <- function(...) {
dots <- list(...)
Expand Down
10 changes: 9 additions & 1 deletion r/R/dplyr-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,15 @@ mutate.arrow_dplyr_query <- function(.data,
new_var <- names(exprs)[i]
results[[new_var]] <- arrow_eval(exprs[[i]], mask)
if (inherits(results[[new_var]], "try-error")) {
msg <- paste('Expression', as_label(exprs[[i]]), 'not supported in Arrow')
expr_lab <- as_label(exprs[[i]])
# Look for informative message from the Arrow function version
if (inherits(results[[new_var]], "arrow-try-error")) {
# Include it if found
msg <- paste0('In ', expr_lab, ', ', as.character(results[[new_var]]))
} else {
# Otherwise be opaque (the original error is probably not useful)
msg <- paste('Expression', expr_lab, 'not supported in Arrow')
}
return(abandon_ship(call, .data, msg))
} else if (!inherits(results[[new_var]], "Expression") &&
!is.null(results[[new_var]])) {
Expand Down
4 changes: 3 additions & 1 deletion r/tests/testthat/test-dplyr-mutate.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ test_that("nchar() arguments", {
collect(),
tbl
)
# This tests the whole abandon_ship() machinery
expect_warning(
expect_dplyr_equal(
input %>%
Expand All @@ -128,7 +129,8 @@ test_that("nchar() arguments", {
collect(),
tbl
),
"not supported"
'In nchar(verses, type = "bytes", allowNA = TRUE), allowNA = TRUE not supported by Arrow; pulling data into R',
fixed = TRUE
)
})

Expand Down
113 changes: 43 additions & 70 deletions r/tests/testthat/test-dplyr-string-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -342,99 +342,73 @@ test_that("arrow_*_split_whitespace functions", {
collect(),
tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux")))
)

})

test_that("errors and warnings in string splitting", {
df <- tibble(x = c("Foo and bar", "baz and qux and quux"))

# These conditions generate an error, but abandon_ship() catches the error,
# issues a warning, and pulls the data into R
expect_warning(
df %>%
Table$create() %>%
mutate(x = strsplit(x, "and.*", fixed = FALSE)) %>%
collect(),
regexp = "not supported"
# issues a warning, and pulls the data into R (if computing on InMemoryDataset)
# Elsewhere we test that abandon_ship() works,
# so here we can just call the functions directly

x <- Expression$field_ref("x")
expect_error(
nse_funcs$strsplit(x, "and.*", fixed = FALSE),
'Regular expression matching in strsplit() not supported by Arrow',
fixed = TRUE
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, "and.?")) %>%
collect()
expect_error(
nse_funcs$str_split(x, "and.?"),
'Regular expression matching in str_split() not supported by Arrow',
fixed = TRUE
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, regex("and.?"), n = 2)) %>%
collect(),
regexp = "not supported"
expect_error(
nse_funcs$str_split(x, regex("and.*")),
'Regular expression matching in str_split() not supported by Arrow',
fixed = TRUE
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, fixed("and", ignore_case = TRUE))) %>%
collect(),
"not supported"
expect_error(
nse_funcs$str_split(x, fixed("and", ignore_case = TRUE)),
"Case-insensitive string splitting not supported by Arrow"
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, coll("and.?"))) %>%
collect(),
regexp = "not supported"
expect_error(
nse_funcs$str_split(x, coll("and.?")),
"Pattern modifier `coll()` not supported by Arrow",
fixed = TRUE
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, boundary(type = "word"))) %>%
collect(),
regexp = "not supported"
expect_error(
nse_funcs$str_split(x, boundary(type = "word")),
"Pattern modifier `boundary()` not supported by Arrow",
fixed = TRUE
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, "and", n = 0)) %>%
collect(),
regexp = "not supported"
expect_error(
nse_funcs$str_split(x, "and", n = 0),
"Splitting strings into zero parts not supported by Arrow"
)

# This condition generates a warning
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_split(x, fixed("and"), simplify = TRUE)) %>%
collect(),
"ignored"
nse_funcs$str_split(x, fixed("and"), simplify = TRUE),
"Argument 'simplify = TRUE' will be ignored"
)

})

test_that("errors and warnings in string detection and replacement", {
df <- tibble(x = c("Foo", "bar"))
x <- Expression$field_ref("x")

# These conditions generate an error, but abandon_ship() catches the error,
# issues a warning, and pulls the data into R
expect_warning(
df %>%
Table$create() %>%
filter(str_detect(x, boundary(type = "character"))) %>%
collect(),
regexp = "not implemented"
expect_error(
nse_funcs$str_detect(x, boundary(type = "character")),
"Pattern modifier `boundary()` not supported by Arrow",
fixed = TRUE
)
expect_warning(
df %>%
Table$create() %>%
mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
collect(),
regexp = "not supported"
expect_error(
nse_funcs$str_replace_all(x, coll("o", locale = "en"), "ó"),
"Pattern modifier `coll()` not supported by Arrow",
fixed = TRUE
)

# This condition generates a warning
expect_warning(
df %>%
Table$create() %>%
transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
nse_funcs$str_replace_all(x, regex("o", multiline = TRUE), "u"),
"Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
)

Expand Down Expand Up @@ -521,5 +495,4 @@ test_that("edge cases in string detection and replacement", {
collect(),
tibble(x = c("ABC"))
)

})

0 comments on commit a0914f6

Please sign in to comment.