From e97656558973beed940bace7308a6017367788e2 Mon Sep 17 00:00:00 2001
From: Neal Richardson <neal.p.richardson@gmail.com>
Date: Sun, 7 Aug 2022 06:43:02 -0400
Subject: [PATCH] Apply #13773, drop badges, add placeholder write_arrow

---
 r/NAMESPACE                       |  1 +
 r/R/arrow-package.R               |  8 ++++++++
 r/R/compute.R                     |  9 ++++++++-
 r/R/table.R                       |  9 ++++++++-
 r/README.md                       |  8 ++------
 r/man/register_scalar_function.Rd |  2 +-
 r/man/write_arrow.Rd              | 15 +++++++++++++++
 r/tests/testthat/test-compute.R   | 24 ++++++++++++++++++++++--
 8 files changed, 65 insertions(+), 11 deletions(-)
 create mode 100644 r/man/write_arrow.Rd

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 17f404caa140d..61a3b7048f88b 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -374,6 +374,7 @@ export(utf8)
 export(value_counts)
 export(vctrs_extension_array)
 export(vctrs_extension_type)
+export(write_arrow)
 export(write_csv_arrow)
 export(write_dataset)
 export(write_feather)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index f3e0b817d5f42..943d2d73e19bc 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -138,3 +138,11 @@ option_use_threads <- function() {
 option_compress_metadata <- function() {
   !is_false(getOption("arrow.compress_metadata"))
 }
+
+# Restore this to suppress sparklyr warning
+
+#' Use `write_ipc_stream` or `write_to_raw` instead.
+#' @param ... Ignored arguments
+#' @export
+#' @keywords internal
+write_arrow <- function(...) stop("write_arrow has been removed")
diff --git a/r/R/compute.R b/r/R/compute.R
index 0985e73a5f2d3..636c9146ca37b 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -344,7 +344,7 @@ cast_options <- function(safe = TRUE, ...) {
 #' @return `NULL`, invisibly
 #' @export
 #'
-#' @examplesIf arrow_with_dataset()
+#' @examplesIf arrow_with_dataset() && identical(Sys.getenv("NOT_CRAN"), "true")
 #' library(dplyr, warn.conflicts = FALSE)
 #'
 #' some_model <- lm(mpg ~ disp + cyl, data = mtcars)
@@ -385,6 +385,13 @@ register_scalar_function <- function(name, fun, in_type, out_type,
     update_cache = TRUE
   )
 
+  # User-defined functions require some special handling
+  # in the query engine which currently require an opt-in using
+  # the R_ARROW_COLLECT_WITH_UDF environment variable while this
+  # behaviour is stabilized.
+  # TODO(ARROW-17178) remove the need for this!
+  Sys.setenv(R_ARROW_COLLECT_WITH_UDF = "true")
+
   invisible(NULL)
 }
 
diff --git a/r/R/table.R b/r/R/table.R
index 5579c676d5157..d7e276415c5cd 100644
--- a/r/R/table.R
+++ b/r/R/table.R
@@ -331,5 +331,12 @@ as_arrow_table.arrow_dplyr_query <- function(x, ...) {
   # See query-engine.R for ExecPlan/Nodes
   plan <- ExecPlan$create()
   final_node <- plan$Build(x)
-  plan$Run(final_node, as_table = TRUE)
+
+  run_with_event_loop <- identical(
+    Sys.getenv("R_ARROW_COLLECT_WITH_UDF", ""),
+    "true"
+  )
+
+  result <- plan$Run(final_node, as_table = run_with_event_loop)
+  as_arrow_table(result)
 }
diff --git a/r/README.md b/r/README.md
index 1509ae7793f6a..c939faf26dee8 100644
--- a/r/README.md
+++ b/r/README.md
@@ -1,9 +1,5 @@
 # arrow
 
-[![cran](https://www.r-pkg.org/badges/version-last-release/arrow)](https://cran.r-project.org/package=arrow)
-[![CI](https://github.com/apache/arrow/workflows/R/badge.svg?event=push)](https://github.com/apache/arrow/actions?query=workflow%3AR+branch%3Amaster+event%3Apush)
-[![conda-forge](https://img.shields.io/conda/vn/conda-forge/r-arrow.svg)](https://anaconda.org/conda-forge/r-arrow)
-
 **[Apache Arrow](https://arrow.apache.org/) is a cross-language
 development platform for in-memory data.** It specifies a standardized
 language-independent columnar memory format for flat and hierarchical
@@ -138,7 +134,7 @@ returns an R `data.frame`. To return an Arrow `Table`, set argument
 -   `read_json_arrow()`: read a JSON data file
 
 For writing data to single files, the `arrow` package provides the
-functions `write_parquet()`, `write_feather()`, and `write_csv_arrow()`. 
+functions `write_parquet()`, `write_feather()`, and `write_csv_arrow()`.
 These can be used with R `data.frame` and Arrow `Table` objects.
 
 For example, let’s write the Star Wars characters data that’s included
@@ -270,7 +266,7 @@ sw %>%
 ```
 
 Additionally, equality joins (e.g. `left_join()`, `inner_join()`) are supported
-for joining multiple tables. 
+for joining multiple tables.
 
 ```r
 jedi <- data.frame(
diff --git a/r/man/register_scalar_function.Rd b/r/man/register_scalar_function.Rd
index 4da8f54f645b0..324dd5fad1f58 100644
--- a/r/man/register_scalar_function.Rd
+++ b/r/man/register_scalar_function.Rd
@@ -48,7 +48,7 @@ stateless and return output with the same shape (i.e., the same number
 of rows) as the input.
 }
 \examples{
-\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+\dontshow{if (arrow_with_dataset() && identical(Sys.getenv("NOT_CRAN"), "true")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 library(dplyr, warn.conflicts = FALSE)
 
 some_model <- lm(mpg ~ disp + cyl, data = mtcars)
diff --git a/r/man/write_arrow.Rd b/r/man/write_arrow.Rd
new file mode 100644
index 0000000000000..a86588e1d4862
--- /dev/null
+++ b/r/man/write_arrow.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/arrow-package.R
+\name{write_arrow}
+\alias{write_arrow}
+\title{Use \code{write_ipc_stream} or \code{write_to_raw} instead.}
+\usage{
+write_arrow(...)
+}
+\arguments{
+\item{...}{Ignored arguments}
+}
+\description{
+Use \code{write_ipc_stream} or \code{write_to_raw} instead.
+}
+\keyword{internal}
diff --git a/r/tests/testthat/test-compute.R b/r/tests/testthat/test-compute.R
index 9e487169f4b15..bbc727de8fc3f 100644
--- a/r/tests/testthat/test-compute.R
+++ b/r/tests/testthat/test-compute.R
@@ -81,6 +81,9 @@ test_that("arrow_scalar_function() works with auto_convert = TRUE", {
 
 test_that("register_scalar_function() adds a compute function to the registry", {
   skip_if_not(CanRunWithCapturedR())
+  # TODO(ARROW-17178): User-defined function-friendly ExecPlan execution has
+  # occasional valgrind errors
+  skip_on_linux_devel()
 
   register_scalar_function(
     "times_32",
@@ -109,6 +112,8 @@ test_that("register_scalar_function() adds a compute function to the registry",
       dplyr::collect(),
     tibble::tibble(a = 1L, b = 32.0)
   )
+
+  Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF")
 })
 
 test_that("arrow_scalar_function() with bad return type errors", {
@@ -143,6 +148,9 @@ test_that("arrow_scalar_function() with bad return type errors", {
     call_function("times_32_bad_return_type_scalar", Array$create(1L)),
     "Expected return Array or Scalar with type 'double'"
   )
+
+  # TODO(ARROW-17178) remove the need for this!
+  Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF")
 })
 
 test_that("register_user_defined_function() can register multiple kernels", {
@@ -203,12 +211,18 @@ test_that("register_user_defined_function() errors for unsupported specification
     ),
     "Kernels for user-defined function must accept the same number of arguments"
   )
+
+  # TODO(ARROW-17178) remove the need for this!
+  Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF")
 })
 
 test_that("user-defined functions work during multi-threaded execution", {
   skip_if_not(CanRunWithCapturedR())
   skip_if_not_available("dataset")
-  # Snappy has a UBSan issue: https://github.com/google/snappy/pull/148
+  # Skip on linux devel because:
+  # TODO(ARROW-17283): Snappy has a UBSan issue that is fixed in the dev version
+  # TODO(ARROW-17178): User-defined function-friendly ExecPlan execution has
+  # occasional valgrind errors
   skip_on_linux_devel()
 
   n_rows <- 10000
@@ -255,6 +269,9 @@ test_that("user-defined functions work during multi-threaded execution", {
     dplyr::collect()
 
   expect_identical(result2$fun_result, example_df$value * 32)
+
+  # TODO(ARROW-17178) remove the need for this!
+  Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF")
 })
 
 test_that("user-defined error when called from an unsupported context", {
@@ -271,7 +288,7 @@ test_that("user-defined error when called from an unsupported context", {
   on.exit(unregister_binding("times_32", update_cache = TRUE))
 
   stream_plan_with_udf <- function() {
-   record_batch(a = 1:1000) %>%
+    record_batch(a = 1:1000) %>%
       dplyr::mutate(b = times_32(a)) %>%
       as_record_batch_reader() %>%
       as_arrow_table()
@@ -304,4 +321,7 @@ test_that("user-defined error when called from an unsupported context", {
       "Call to R \\(.*?\\) from a non-R thread from an unsupported context"
     )
   }
+
+  # TODO(ARROW-17178) remove the need for this!
+  Sys.unsetenv("R_ARROW_COLLECT_WITH_UDF")
 })