ARROW-16154: [R] Errors which pass through handle_csv_read_error() …

…and `handle_parquet_io_error()` need better error tracing As discussed on #12826 Not sure how (if) to write tests but tried running it locally using the CSV directory set up in `test-dataset-csv.R` with and without this change, and without it, we get, e.g. ``` open_dataset(csv_dir) # Error in `handle_parquet_io_error()` at r/R/dataset.R:221:6: # ! Invalid: Error creating dataset. Could not read schema from '/tmp/RtmpuTyOD8/file5049dcf581a5/5/file1.csv': Could not open Parquet input source '/tmp/RtmpuTyOD8/file5049dcf581a5/5/file1.csv': Parquet magic bytes not found in footer. Either the file is corrupted or this is not a parquet file. # /home/nic2/arrow/cpp/src/arrow/dataset/file_parquet.cc:323 GetReader(source, scan_options). Is this a 'parquet' file? # /home/nic2/arrow/cpp/src/arrow/dataset/discovery.cc:40 InspectSchemas(std::move(options)) # /home/nic2/arrow/cpp/src/arrow/dataset/discovery.cc:262 Inspect(options.inspect_options) # ℹ Did you mean to specify a 'format' other than the default (parquet)? ``` and then with it: ``` open_dataset(csv_dir) # Error in `open_dataset()`: # ! Invalid: Error creating dataset. Could not read schema from '/tmp/RtmpLbqZs6/file4e4ca14fb5795/5/file1.csv': Could not open Parquet input source '/tmp/RtmpLbqZs6/file4e4ca14fb5795/5/file1.csv': Parquet magic bytes not found in footer. Either the file is corrupted or this is not a parquet file. # /home/nic2/arrow/cpp/src/arrow/dataset/file_parquet.cc:323 GetReader(source, scan_options). Is this a 'parquet' file? # /home/nic2/arrow/cpp/src/arrow/dataset/discovery.cc:40 InspectSchemas(std::move(options)) # /home/nic2/arrow/cpp/src/arrow/dataset/discovery.cc:262 Inspect(options.inspect_options) # ℹ Did you mean to specify a 'format' other than the default (parquet)? ``` Closes #12839 from thisisnic/ARROW-16154_error_trace Authored-by: Nic Crane <[email protected]> Signed-off-by: Nic Crane <[email protected]>
apache · Apr 13, 2022 · 5d5cceb · 5d5cceb
1 parent 681ede6
commit 5d5cceb
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 15 deletions.
diff --git a/r/R/csv.R b/r/R/csv.R
@@ -200,8 +200,10 @@ read_delim_arrow <- function(file,
 
   tryCatch(
     tab <- reader$Read(),
-    error = function(e) {
-      handle_csv_read_error(e, schema)
+    # n = 4 because we want the error to show up as being from read_delim_arrow()
+    # and not handle_csv_read_error()
+    error = function(e, call = caller_env(n = 4)) {
+      handle_csv_read_error(e, schema, call)
     }
   )
 

diff --git a/r/R/dataset.R b/r/R/dataset.R
@@ -217,8 +217,10 @@ open_dataset <- function(sources,
   tryCatch(
     # Default is _not_ to inspect/unify schemas
     factory$Finish(schema, isTRUE(unify_schemas)),
-    error = function(e) {
-      handle_parquet_io_error(e, format)
+    # n = 4 because we want the error to show up as being from open_dataset()
+    # and not handle_parquet_io_error()
+    error = function(e, call = caller_env(n = 4)) {
+      handle_parquet_io_error(e, format, call)
     }
   )
 }

diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R
@@ -29,8 +29,10 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) {
   # See query-engine.R for ExecPlan/Nodes
   tryCatch(
     tab <- do_exec_plan(x),
-    error = function(e) {
-      handle_csv_read_error(e, x$.data$schema)
+    # n = 4 because we want the error to show up as being from collect()
+    # and not handle_csv_read_error()
+    error = function(e, call = caller_env(n = 4)) {
+      handle_csv_read_error(e, x$.data$schema, call)
     }
   )
 

diff --git a/r/R/util.R b/r/R/util.R
@@ -125,17 +125,17 @@ read_compressed_error <- function(e) {
   stop(e)
 }
 
-handle_parquet_io_error <- function(e, format) {
+handle_parquet_io_error <- function(e, format, call) {
   msg <- conditionMessage(e)
   if (grepl("Parquet magic bytes not found in footer", msg) && length(format) > 1 && is_character(format)) {
     # If length(format) > 1, that means it is (almost certainly) the default/not specified value
     # so let the user know that they should specify the actual (not parquet) format
-    abort(c(
+    msg <- c(
       msg,
       i = "Did you mean to specify a 'format' other than the default (parquet)?"
-    ))
+    )
   }
-  stop(e)
+  abort(msg, call = call)
 }
 
 is_writable_table <- function(x) {
@@ -198,19 +198,18 @@ repeat_value_as_array <- function(object, n) {
   return(Scalar$create(object)$as_array(n))
 }
 
-handle_csv_read_error <- function(e, schema) {
+handle_csv_read_error <- function(e, schema, call) {
   msg <- conditionMessage(e)
 
   if (grepl("conversion error", msg) && inherits(schema, "Schema")) {
-    abort(c(
+    msg <- c(
       msg,
       i = paste(
         "If you have supplied a schema and your data contains a header",
         "row, you should supply the argument `skip = 1` to prevent the",
         "header being read in as data."
       )
-    ))
+    )
   }
-
-  abort(msg)
+  abort(msg, call = call)
 }