From 09ea0ad291614a91cd738809b875e710453146d2 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 24 Sep 2019 09:43:01 +0200 Subject: [PATCH] + ParquetWriterProperties$create() and associated ParquetWriterProperties_Builder class skeleton --- r/R/arrowExports.R | 8 ++++++++ r/R/parquet.R | 21 ++++++++++++++++----- r/src/arrowExports.cpp | 31 +++++++++++++++++++++++++++++++ r/src/parquet.cpp | 10 ++++++++++ 4 files changed, 65 insertions(+), 5 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index d0a9266dfc394..8a8deed8a0c68 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -920,6 +920,14 @@ parquet___default_writer_properties <- function(){ .Call(`_arrow_parquet___default_writer_properties` ) } +parquet___WriterProperties___Builder__create <- function(){ + .Call(`_arrow_parquet___WriterProperties___Builder__create` ) +} + +parquet___WriterProperties___Builder__build <- function(builder){ + .Call(`_arrow_parquet___WriterProperties___Builder__build` , builder) +} + parquet___arrow___ParquetFileWriter__Open <- function(schema, sink, properties, arrow_properties){ .Call(`_arrow_parquet___arrow___ParquetFileWriter__Open` , schema, sink, properties, arrow_properties) } diff --git a/r/R/parquet.R b/r/R/parquet.R index 5b9af2189c67e..48e67818b2d31 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -73,6 +73,17 @@ ParquetWriterProperties$default <- function() { shared_ptr(ParquetWriterProperties, parquet___default_writer_properties()) } +ParquetWriterProperties_Builder <- R6Class("ParquetWriterProperties_Builder", inherit = Object, + public = list( + + ) +) + +ParquetWriterProperties$create <- function() { + builder <- shared_ptr(ParquetWriterProperties_Builder, parquet___WriterProperties___Builder__create()) + shared_ptr(ParquetWriterProperties, parquet___WriterProperties___Builder__build(builder)) +} + ParquetFileWriter <- R6Class("ParquetFileWriter", inherit = Object, public = list( WriteTable = function(table, chunk_size) { @@ -103,7 +114,7 @@ ParquetFileWriter$create <- function( #' #' @param table An [arrow::Table][Table], or an object convertible to it with [to_arrow()] #' @param sink an [arrow::io::OutputStream][OutputStream] or a string which is interpreted as a file path -#' @param chunk_size chunk size +#' @param chunk_size chunk size. If NULL, the number of rows of the table is used #' #' @examples #' \donttest{ @@ -116,23 +127,23 @@ ParquetFileWriter$create <- function( #' #' } #' @export -write_parquet <- function(table, sink, chunk_size = table$num_rows) { +write_parquet <- function(table, sink, chunk_size = NULL) { UseMethod("write_parquet", sink) } #' @export -write_parquet.OutputStream <- function(table, sink, chunk_size = table$num_rows) { +write_parquet.OutputStream <- function(table, sink, chunk_size = NULL) { table <- to_arrow(table) schema <- table$schema properties <- ParquetWriterProperties$default() arrow_properties <- ParquetArrowWriterProperties$default() writer <- ParquetFileWriter$create(schema, sink, properties = properties, arrow_properties = arrow_properties) - writer$WriteTable(table, chunk_size = chunk_size) + writer$WriteTable(table, chunk_size = chunk_size %||% table$num_rows) writer$Close() } #' @export -write_parquet.character <- function(table, sink, chunk_size = table$num_rows) { +write_parquet.character <- function(table, sink, chunk_size = NULL) { table <- to_arrow(table) file_sink <- FileOutputStream$create(sink) on.exit(file_sink$close()) diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 645e03c90a6be..503a6204f718e 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -3568,6 +3568,35 @@ RcppExport SEXP _arrow_parquet___default_writer_properties(){ } #endif +// parquet.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr parquet___WriterProperties___Builder__create(); +RcppExport SEXP _arrow_parquet___WriterProperties___Builder__create(){ +BEGIN_RCPP + return Rcpp::wrap(parquet___WriterProperties___Builder__create()); +END_RCPP +} +#else +RcppExport SEXP _arrow_parquet___WriterProperties___Builder__create(){ + Rf_error("Cannot call parquet___WriterProperties___Builder__create(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// parquet.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr parquet___WriterProperties___Builder__build(const std::shared_ptr& builder); +RcppExport SEXP _arrow_parquet___WriterProperties___Builder__build(SEXP builder_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type builder(builder_sexp); + return Rcpp::wrap(parquet___WriterProperties___Builder__build(builder)); +END_RCPP +} +#else +RcppExport SEXP _arrow_parquet___WriterProperties___Builder__build(SEXP builder_sexp){ + Rf_error("Cannot call parquet___WriterProperties___Builder__build(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // parquet.cpp #if defined(ARROW_R_WITH_ARROW) std::unique_ptr parquet___arrow___ParquetFileWriter__Open(const std::shared_ptr& schema, const std::shared_ptr& sink, const std::shared_ptr& properties, const std::shared_ptr& arrow_properties); @@ -4754,6 +4783,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_parquet___ArrowWriterProperties___Builder__disallow_truncated_timestamps", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__disallow_truncated_timestamps, 1}, { "_arrow_parquet___ArrowWriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__build, 1}, { "_arrow_parquet___default_writer_properties", (DL_FUNC) &_arrow_parquet___default_writer_properties, 0}, + { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, + { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp index 1a0407686093e..2dfba18b3a4aa 100644 --- a/r/src/parquet.cpp +++ b/r/src/parquet.cpp @@ -132,6 +132,16 @@ std::shared_ptr parquet___default_writer_properties() return parquet::default_writer_properties(); } +// [[arrow::export]] +std::shared_ptr parquet___WriterProperties___Builder__create() { + return std::make_shared(); +} + +// [[arrow::export]] +std::shared_ptr parquet___WriterProperties___Builder__build(const std::shared_ptr& builder) { + return builder->build(); +} + // [[arrow::export]] std::unique_ptr parquet___arrow___ParquetFileWriter__Open( const std::shared_ptr& schema,