Skip to content

Commit

Permalink
ARROW-17355: [R] Refactor the handle_* utility functions for a better…
Browse files Browse the repository at this point in the history
… dev experience (#14030)

For context; these `handle_*` functions originally caught an error and if it contained a particular string raised an augmented error message with extra guidance for the user (and if not, raised the original error message).

This became problematic in a later PR where we wanted to test multiple conditions and only raise the original error if none of the conditions were met - the temporary approach was to move the responsibility for the raising of the original error to outside of the `handle_*` functions.  The issue here is that this makes it easy for developers to forget to add in this line of code.

The proposed solution here implements a generic error augmentation function `augment_io_error_msg()`, which tests all conditions, raises an error with an augmented message if any conditions are met, or raises the original error if not.

Authored-by: Nic Crane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
  • Loading branch information
thisisnic authored Oct 1, 2022
1 parent d60d8c6 commit 4dfa617
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 28 deletions.
4 changes: 2 additions & 2 deletions r/R/csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,9 @@ read_delim_arrow <- function(file,
tryCatch(
tab <- reader$Read(),
# n = 4 because we want the error to show up as being from read_delim_arrow()
# and not handle_csv_read_error()
# and not augment_io_error_msg()
error = function(e, call = caller_env(n = 4)) {
handle_csv_read_error(e, schema, call)
augment_io_error_msg(e, call, schema = schema)
}
)

Expand Down
5 changes: 2 additions & 3 deletions r/R/dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,9 @@ open_dataset <- function(sources,
# Default is _not_ to inspect/unify schemas
factory$Finish(schema, isTRUE(unify_schemas)),
# n = 4 because we want the error to show up as being from open_dataset()
# and not handle_parquet_io_error()
# and not augment_io_error_msg()
error = function(e, call = caller_env(n = 4)) {
handle_parquet_io_error(e, format, call)
abort(conditionMessage(e), call = call)
augment_io_error_msg(e, call, format = format)
}
)
}
Expand Down
6 changes: 2 additions & 4 deletions r/R/dplyr-collect.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
tryCatch(
out <- as_arrow_table(x),
# n = 4 because we want the error to show up as being from collect()
# and not handle_csv_read_error()
# and not augment_io_error_msg()
error = function(e, call = caller_env(n = 4)) {
handle_csv_read_error(e, x$.data$schema, call)
handle_augmented_field_misuse(e, call)
abort(conditionMessage(e), call = call)
augment_io_error_msg(e, call, schema = x$.data$schema)
}
)

Expand Down
38 changes: 19 additions & 19 deletions r/R/util.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,7 @@ read_compressed_error <- function(e) {
stop(e)
}

# This function was refactored in ARROW-15260 to only raise an error if
# the appropriate string was found and so errors must be raised manually after
# calling this if matching error not found
# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors
handle_parquet_io_error <- function(e, format, call) {
msg <- conditionMessage(e)
handle_parquet_io_error <- function(msg, call, format) {
if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) {
# If length(format) > 1, that means it is (almost certainly) the default/not specified value
# so let the user know that they should specify the actual (not parquet) format
Expand Down Expand Up @@ -209,13 +204,7 @@ repeat_value_as_array <- function(object, n) {
return(Scalar$create(object)$as_array(n))
}

# This function was refactored in ARROW-15260 to only raise an error if
# the appropriate string was found and so errors must be raised manually after
# calling this if matching error not found
# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors
handle_csv_read_error <- function(e, schema, call) {
msg <- conditionMessage(e)

handle_csv_read_error <- function(msg, call, schema) {
if (grepl("conversion error", msg) && inherits(schema, "Schema")) {
msg <- c(
msg,
Expand All @@ -229,12 +218,7 @@ handle_csv_read_error <- function(e, schema, call) {
}
}

# This function only raises an error if
# the appropriate string was found and so errors must be raised manually after
# calling this if matching error not found
# TODO: Refactor as part of ARROW-17355 to prevent potential missed errors
handle_augmented_field_misuse <- function(e, call) {
msg <- conditionMessage(e)
handle_augmented_field_misuse <- function(msg, call) {
if (grepl("No match for FieldRef.Name(__filename)", msg, fixed = TRUE)) {
msg <- c(
msg,
Expand All @@ -251,3 +235,19 @@ handle_augmented_field_misuse <- function(e, call) {
is_compressed <- function(compression) {
!identical(compression, "uncompressed")
}

# handler function which checks for a number of different read errors
augment_io_error_msg <- function(e, call, schema = NULL, format = NULL) {

msg <- conditionMessage(e)

if (!is.null(schema)) {
handle_csv_read_error(msg, call, schema)
}
if (!is.null(format)) {
handle_parquet_io_error(msg, call, format)
}

handle_augmented_field_misuse(msg, call)
abort(msg, call = call)
}

0 comments on commit 4dfa617

Please sign in to comment.