From 4f4363d4e595e255dcc0fb2c959902296e0494cd Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 21 May 2021 08:33:08 +0100 Subject: [PATCH 1/6] Add more examples --- r/R/compression.R | 2 ++ r/R/dataset-format.R | 12 ++++++++++++ r/R/dataset.R | 24 ++++++++++++++++++++++++ r/R/flight.R | 2 ++ r/R/ipc_stream.R | 4 ++++ r/man/FileFormat.Rd | 13 +++++++++++++ r/man/codec_is_available.Rd | 3 +++ r/man/load_flight_server.Rd | 3 +++ r/man/open_dataset.Rd | 25 +++++++++++++++++++++++++ r/man/write_ipc_stream.Rd | 5 +++++ 10 files changed, 93 insertions(+) diff --git a/r/R/compression.R b/r/R/compression.R index ebd4c54cd82b3..bb051b8d53563 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -63,6 +63,8 @@ Codec$create <- function(type = "gzip", compression_level = NA) { #' "zstd", "lz4", "lzo", or "bz2", case insensitive. #' @return Logical: is `type` available? #' @export +#' @examples +#' codec_is_available("gzip") codec_is_available <- function(type) { util___Codec__IsAvailable(compression_from_name(type)) } diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R index 854672b66a2a8..77344cad008fb 100644 --- a/r/R/dataset-format.R +++ b/r/R/dataset-format.R @@ -53,6 +53,18 @@ #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`) #' @rdname FileFormat #' @name FileFormat +#' @examples +#' ## Semi-colon delimited files +#' # Set up directory for examples +#' tf <- tempfile() +#' dir.create(tf) +#' on.exit(unlink(tf)) +#' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) +#' +#' # Create FileFormat object +#' format <- FileFormat$create(format = "text", delimiter = ";") +#' +#' open_dataset(tf, format = format) #' @export FileFormat <- R6Class("FileFormat", inherit = ArrowObject, active = list( diff --git a/r/R/dataset.R b/r/R/dataset.R index 745c39af068ea..b47ffde26c3a5 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -69,6 +69,30 @@ #' @export #' @seealso `vignette("dataset", package = "arrow")` #' @include arrow-package.R +#' @examples +#' # Set up directory for examples +#' tf <- tempfile() +#' dir.create(tf) +#' on.exit(unlink(tf)) +#' \dontrun{ +#' write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet")) +#' write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet")) +#' write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet")) +#' +#' # You can specify a directory containing the files for your dataset and +#' # open_dataset will scan all files in your directory. +#' open_dataset(tf) +#' +#' # You can also supply a vector of paths +#' open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet"))) +#' } +#' ## You must specify the file format if using a format other than parquet. +#' write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv")) +#' write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv")) +#' # This line will results in errors when you try to work with the data +#' \dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))} +#' # This is the correct way to open a dataset containing CSVs +#' open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") open_dataset <- function(sources, schema = NULL, partitioning = hive_partition(), diff --git a/r/R/flight.R b/r/R/flight.R index 486c59a9e12dc..b3b354e82f64c 100644 --- a/r/R/flight.R +++ b/r/R/flight.R @@ -21,6 +21,8 @@ #' @param path file system path where the Python module is found. Default is #' to look in the `inst/` directory for included modules. #' @export +#' @examples +#' \dontrun{load_flight_server("demo_flight_server")} load_flight_server <- function(name, path = system.file(package = "arrow")) { reticulate::import_from_path(name, path) } diff --git a/r/R/ipc_stream.R b/r/R/ipc_stream.R index 4f506f3332b89..f2d56864c8ed3 100644 --- a/r/R/ipc_stream.R +++ b/r/R/ipc_stream.R @@ -35,6 +35,10 @@ #' serialize data to a buffer. #' [RecordBatchWriter] for a lower-level interface. #' @export +#' @examples +#' tf <- tempfile() +#' on.exit(unlink(tf)) +#' write_ipc_stream(mtcars, tf) write_ipc_stream <- function(x, sink, ...) { x_out <- x # So we can return the data we got if (is.data.frame(x)) { diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index b8d4dc01badf0..fc95814b2a740 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,3 +51,16 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } +\examples{ +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +} diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd index 1b5e8278fa9b0..5cda813f41673 100644 --- a/r/man/codec_is_available.Rd +++ b/r/man/codec_is_available.Rd @@ -18,3 +18,6 @@ Support for compression libraries depends on the build-time settings of the Arrow C++ library. This function lets you know which are available for use. } +\examples{ +codec_is_available("gzip") +} diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd index 7e2000a9ca222..e521efa33282e 100644 --- a/r/man/load_flight_server.Rd +++ b/r/man/load_flight_server.Rd @@ -15,3 +15,6 @@ to look in the \verb{inst/} directory for included modules.} \description{ Load a Python Flight server } +\examples{ +\dontrun{load_flight_server("demo_flight_server")} +} diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd index e3e361786274d..08125f9f1aaf3 100644 --- a/r/man/open_dataset.Rd +++ b/r/man/open_dataset.Rd @@ -74,6 +74,31 @@ can accelerate queries that only touch some partitions (files). Call \code{open_dataset()} to point to a directory of data files and return a \code{Dataset}, then use \code{dplyr} methods to query it. } +\examples{ +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +\dontrun{ +write_parquet(mtcars[1:10,], file.path(tf, "file1.parquet")) +write_parquet(mtcars[11:20,], file.path(tf, "file2.parquet")) +write_parquet(mtcars[21:32,], file.path(tf, "file3.parquet")) + +# You can specify a directory containing the files for your dataset and +# open_dataset will scan all files in your directory. +open_dataset(tf) + +# You can also supply a vector of paths +open_dataset(c(file.path(tf, "file3.parquet"), file.path(tf, "file2.parquet"))) +} +## You must specify the file format if using a format other than parquet. +write_csv_arrow(mtcars[1:10,], file.path(tf, "file1.csv")) +write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv")) +# This line will results in errors when you try to work with the data +\dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))} +# This is the correct way to open a dataset containing CSVs +open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") +} \seealso{ \code{vignette("dataset", package = "arrow")} } diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd index 4f742ce917864..a504a31c3048c 100644 --- a/r/man/write_ipc_stream.Rd +++ b/r/man/write_ipc_stream.Rd @@ -31,6 +31,11 @@ with some nonstandard behavior, is deprecated. You should explicitly choose the function that will write the desired IPC format (stream or file) since either can be written to a file or \code{OutputStream}. } +\examples{ +tf <- tempfile() +on.exit(unlink(tf)) +write_ipc_stream(mtcars, tf) +} \seealso{ \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to serialize data to a buffer. From 90ee41b4d3d76b046802bcef8dc460fd71c9c4b1 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 21 May 2021 08:34:19 +0100 Subject: [PATCH 2/6] Add more details to scalar docs --- r/R/scalar.R | 24 ++++++++++++++++++++++++ r/man/Scalar.Rd | 27 +++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/r/R/scalar.R b/r/R/scalar.R index cbda5964a2cbe..31a00f9d527fd 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -23,9 +23,33 @@ #' @docType class #' #' @description A `Scalar` holds a single value of an Arrow type. +#' +#' @section Methods: +#' `$ToString()`: convert to a string +#' `$as_vector()`: convert to an R vector +#' `$as_array()`: convert to an Arrow `Array` +#' `$Equals(other)`: is this Scalar equal to `other` +#' `$ApproxEquals(other)`: is this Scalar approximately equal to `other` +#' `$is_valid`: is this Scalar valid +#' `$null_count`: number of invalid values - 1 or 0 +#' `$type`: Scalar type #' #' @name Scalar #' @rdname Scalar +#' @examples +#' Scalar$create(pi) +#' Scalar$create(404) +#' # If you pass a vector into Scalar$create, you get a list containing your items +#' Scalar$create(c(1, 2, 3)) +#' +#' # Comparisons +#' my_scalar <- Scalar$create(99) +#' my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE +#' my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE +#' my_scalar$Equals(Scalar$create(99.000009)) # FALSE +#' my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) +#' +#' my_scalar$ToString() #' @export Scalar <- R6Class("Scalar", inherit = ArrowDatum, diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd index 2ef5b02ccbe50..1c115b7c19983 100644 --- a/r/man/Scalar.Rd +++ b/r/man/Scalar.Rd @@ -7,3 +7,30 @@ \description{ A \code{Scalar} holds a single value of an Arrow type. } +\section{Methods}{ + +\verb{$ToString()}: convert to a string +\verb{$as_vector()}: convert to an R vector +\verb{$as_array()}: convert to an Arrow \code{Array} +\verb{$Equals(other)}: is this Scalar equal to \code{other} +\verb{$ApproxEquals(other)}: is this Scalar approximately equal to \code{other} +\verb{$is_valid}: is this Scalar valid +\verb{$null_count}: number of invalid values - 1 or 0 +\verb{$type}: Scalar type +} + +\examples{ +Scalar$create(pi) +Scalar$create(404) +# If you pass a vector into Scalar$create, you get a list containing your items +Scalar$create(c(1, 2, 3)) + +# Comparisons +my_scalar <- Scalar$create(99) +my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE +my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE +my_scalar$Equals(Scalar$create(99.000009)) # FALSE +my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) + +my_scalar$ToString() +} From 6debbe6c61905a7c051a9f71f92c9bce5df2b960 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 21 May 2021 11:42:23 +0100 Subject: [PATCH 3/6] Use examplesIf and bump roxygen version to one that supports this --- r/DESCRIPTION | 2 +- r/R/dataset-format.R | 2 +- r/man/FileFormat.Rd | 13 ------------- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 82ca6fed617f1..451ac6c05f776 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -36,7 +36,7 @@ Imports: utils, vctrs Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source") -RoxygenNote: 7.1.1 +RoxygenNote: 7.1.1.9001 VignetteBuilder: knitr Suggests: decor, diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R index 77344cad008fb..17b01d92d81ad 100644 --- a/r/R/dataset-format.R +++ b/r/R/dataset-format.R @@ -53,7 +53,7 @@ #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`) #' @rdname FileFormat #' @name FileFormat -#' @examples +#' @examplesIf arrow_available() #' ## Semi-colon delimited files #' # Set up directory for examples #' tf <- tempfile() diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index fc95814b2a740..b8d4dc01badf0 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,16 +51,3 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } -\examples{ -## Semi-colon delimited files -# Set up directory for examples -tf <- tempfile() -dir.create(tf) -on.exit(unlink(tf)) -write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) - -# Create FileFormat object -format <- FileFormat$create(format = "text", delimiter = ";") - -open_dataset(tf, format = format) -} From b17de92285550889717db70a4093be834c44d049 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 21 May 2021 13:52:38 +0100 Subject: [PATCH 4/6] Use examplesIf --- r/R/dataset.R | 2 +- r/man/FileFormat.Rd | 15 +++++++++++++++ r/man/open_dataset.Rd | 2 ++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/r/R/dataset.R b/r/R/dataset.R index b47ffde26c3a5..4c0f9c04e3f16 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -69,7 +69,7 @@ #' @export #' @seealso `vignette("dataset", package = "arrow")` #' @include arrow-package.R -#' @examples +#' @examplesIf arrow_available() #' # Set up directory for examples #' tf <- tempfile() #' dir.create(tf) diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index b8d4dc01badf0..127fb0364d8a1 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd index 08125f9f1aaf3..3cc89d81af3ea 100644 --- a/r/man/open_dataset.Rd +++ b/r/man/open_dataset.Rd @@ -75,6 +75,7 @@ can accelerate queries that only touch some partitions (files). Call \code{Dataset}, then use \code{dplyr} methods to query it. } \examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # Set up directory for examples tf <- tempfile() dir.create(tf) @@ -98,6 +99,7 @@ write_csv_arrow(mtcars[11:20,], file.path(tf, "file2.csv")) \dontrun{open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")))} # This is the correct way to open a dataset containing CSVs open_dataset(c(file.path(tf, "file1.csv"), file.path(tf, "file2.csv")), format = "csv") +\dontshow{\}) # examplesIf} } \seealso{ \code{vignette("dataset", package = "arrow")} From 6ade1fbb8abb34f5f5270990b79bccd724a963cc Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 21 May 2021 14:44:08 +0100 Subject: [PATCH 5/6] arrow_available -> arrow_dataset --- r/R/dataset.R | 2 +- r/man/open_dataset.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dataset.R b/r/R/dataset.R index 4c0f9c04e3f16..8716ef0d5c5a8 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -69,7 +69,7 @@ #' @export #' @seealso `vignette("dataset", package = "arrow")` #' @include arrow-package.R -#' @examplesIf arrow_available() +#' @examplesIf arrow_with_dataset() #' # Set up directory for examples #' tf <- tempfile() #' dir.create(tf) diff --git a/r/man/open_dataset.Rd b/r/man/open_dataset.Rd index 3cc89d81af3ea..7175bb132eaf0 100644 --- a/r/man/open_dataset.Rd +++ b/r/man/open_dataset.Rd @@ -75,7 +75,7 @@ can accelerate queries that only touch some partitions (files). Call \code{Dataset}, then use \code{dplyr} methods to query it. } \examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # Set up directory for examples tf <- tempfile() dir.create(tf) From 702a28f32fa6fd15c28c8c97c17bd568bb70d0b0 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Fri, 21 May 2021 17:35:06 +0100 Subject: [PATCH 6/6] arrow_available -> arrow_with_dataset --- r/R/dataset-format.R | 2 +- r/man/FileFormat.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R index 17b01d92d81ad..3259ff1077c9c 100644 --- a/r/R/dataset-format.R +++ b/r/R/dataset-format.R @@ -53,7 +53,7 @@ #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`) #' @rdname FileFormat #' @name FileFormat -#' @examplesIf arrow_available() +#' @examplesIf arrow_with_dataset() #' ## Semi-colon delimited files #' # Set up directory for examples #' tf <- tempfile() diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index 127fb0364d8a1..5bc9475b40860 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -52,7 +52,7 @@ It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFile } \examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} ## Semi-colon delimited files # Set up directory for examples tf <- tempfile()