From 874b9fe70e60b459aa8db320069260ac0169c881 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 28 Apr 2021 16:26:05 -0400 Subject: [PATCH] repair bindings --- cpp/src/arrow/compute/exec/CMakeLists.txt | 2 + cpp/src/arrow/dataset/scanner_internal.h | 10 +-- cpp/src/arrow/dataset/scanner_test.cc | 2 +- python/pyarrow/includes/libarrow_dataset.pxd | 26 ++++--- r/NAMESPACE | 1 + r/R/arrowExports.R | 20 ++--- r/R/expression.R | 10 +-- r/man/FileFormat.Rd | 5 +- r/man/FragmentScanOptions.Rd | 11 +++ r/man/arrow-package.Rd | 2 + r/src/arrowExports.cpp | 82 ++++++++++---------- r/src/arrow_types.h | 1 + r/src/dataset.cpp | 7 +- r/src/expression.cpp | 56 ++++++------- 14 files changed, 127 insertions(+), 108 deletions(-) diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt index 6b48fe17cd28a..1aba01e51c17d 100644 --- a/cpp/src/arrow/compute/exec/CMakeLists.txt +++ b/cpp/src/arrow/compute/exec/CMakeLists.txt @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +arrow_install_all_headers("arrow/compute/exec") + add_arrow_compute_test(expression_test PREFIX "arrow-compute") add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/dataset/scanner_internal.h b/cpp/src/arrow/dataset/scanner_internal.h index 9667058e8bbc9..56065d9983e8a 100644 --- a/cpp/src/arrow/dataset/scanner_internal.h +++ b/cpp/src/arrow/dataset/scanner_internal.h @@ -70,7 +70,7 @@ inline RecordBatchIterator FilterRecordBatch(RecordBatchIterator it, } inline Result> ProjectSingleBatch( - const std::shared_ptr& in, const Expression& projection, + const std::shared_ptr& in, const compute::Expression& projection, MemoryPool* pool) { compute::ExecContext exec_context{pool}; ARROW_ASSIGN_OR_RAISE(Datum projected, @@ -126,10 +126,10 @@ class FilterAndProjectScanTask : public ScanTask { Result ToFilteredAndProjectedIterator( const RecordBatchVector& rbs) { auto it = MakeVectorIterator(rbs); - ARROW_ASSIGN_OR_RAISE(Expression simplified_filter, + ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter, SimplifyWithGuarantee(options()->filter, partition_)); - ARROW_ASSIGN_OR_RAISE(Expression simplified_projection, + ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection, SimplifyWithGuarantee(options()->projection, partition_)); RecordBatchIterator filter_it = @@ -141,10 +141,10 @@ class FilterAndProjectScanTask : public ScanTask { Result> FilterAndProjectBatch( const std::shared_ptr& batch) { - ARROW_ASSIGN_OR_RAISE(Expression simplified_filter, + ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_filter, SimplifyWithGuarantee(options()->filter, partition_)); - ARROW_ASSIGN_OR_RAISE(Expression simplified_projection, + ARROW_ASSIGN_OR_RAISE(compute::Expression simplified_projection, SimplifyWithGuarantee(options()->projection, partition_)); ARROW_ASSIGN_OR_RAISE(auto filtered, FilterSingleBatch(batch, simplified_filter, options_->pool)); diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index a83210fdd3bc7..17f4e079ae40b 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -566,7 +566,7 @@ class ControlledDataset : public Dataset { void FinishFragment(int fragment_index) { fragments_[fragment_index]->Finish(); } protected: - Result GetFragmentsImpl(Expression predicate) override { + Result GetFragmentsImpl(compute::Expression predicate) override { std::vector> casted_fragments(fragments_.begin(), fragments_.end()); return MakeVectorIterator(std::move(casted_fragments)); diff --git a/python/pyarrow/includes/libarrow_dataset.pxd b/python/pyarrow/includes/libarrow_dataset.pxd index 82e1c8f13a2b3..8c72284133953 100644 --- a/python/pyarrow/includes/libarrow_dataset.pxd +++ b/python/pyarrow/includes/libarrow_dataset.pxd @@ -32,28 +32,36 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: pass -cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: +cdef extern from "arrow/compute/exec/expression.h" namespace "arrow::compute" nogil: - cdef cppclass CExpression "arrow::dataset::Expression": + cdef cppclass CExpression "arrow::compute::Expression": c_bool Equals(const CExpression& other) const c_string ToString() const CResult[CExpression] Bind(const CSchema&) cdef CExpression CMakeScalarExpression \ - "arrow::dataset::literal"(shared_ptr[CScalar] value) + "arrow::compute::literal"(shared_ptr[CScalar] value) cdef CExpression CMakeFieldExpression \ - "arrow::dataset::field_ref"(c_string name) + "arrow::compute::field_ref"(c_string name) cdef CExpression CMakeCallExpression \ - "arrow::dataset::call"(c_string function, + "arrow::compute::call"(c_string function, vector[CExpression] arguments, shared_ptr[CFunctionOptions] options) cdef CResult[shared_ptr[CBuffer]] CSerializeExpression \ - "arrow::dataset::Serialize"(const CExpression&) + "arrow::compute::Serialize"(const CExpression&) + cdef CResult[CExpression] CDeserializeExpression \ - "arrow::dataset::Deserialize"(shared_ptr[CBuffer]) + "arrow::compute::Deserialize"(shared_ptr[CBuffer]) + + cdef CResult[unordered_map[CFieldRef, CDatum, CFieldRefHash]] \ + CExtractKnownFieldValues "arrow::compute::ExtractKnownFieldValues"( + const CExpression& partition_expression) + + +cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: cdef cppclass CScanOptions "arrow::dataset::ScanOptions": @staticmethod @@ -331,10 +339,6 @@ cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil: shared_ptr[CPartitioning] partitioning() const shared_ptr[CPartitioningFactory] factory() const - cdef CResult[unordered_map[CFieldRef, CDatum, CFieldRefHash]] \ - CExtractKnownFieldValues "arrow::dataset::ExtractKnownFieldValues"( - const CExpression& partition_expression) - cdef cppclass CFileSystemFactoryOptions \ "arrow::dataset::FileSystemFactoryOptions": CPartitioningOrFactory partitioning diff --git a/r/NAMESPACE b/r/NAMESPACE index 117e3de5c22bb..607177235e977 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -162,6 +162,7 @@ export(ParquetArrowReaderProperties) export(ParquetFileFormat) export(ParquetFileReader) export(ParquetFileWriter) +export(ParquetFragmentScanOptions) export(ParquetVersionType) export(ParquetWriterProperties) export(Partitioning) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 51cdcf85df08d..b8d72c30ed63d 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -756,24 +756,24 @@ FixedSizeListType__list_size <- function(type){ .Call(`_arrow_FixedSizeListType__list_size`, type) } -dataset___expr__call <- function(func_name, argument_list, options){ - .Call(`_arrow_dataset___expr__call`, func_name, argument_list, options) +compute___expr__call <- function(func_name, argument_list, options){ + .Call(`_arrow_compute___expr__call`, func_name, argument_list, options) } -dataset___expr__field_ref <- function(name){ - .Call(`_arrow_dataset___expr__field_ref`, name) +compute___expr__field_ref <- function(name){ + .Call(`_arrow_compute___expr__field_ref`, name) } -dataset___expr__get_field_ref_name <- function(ref){ - .Call(`_arrow_dataset___expr__get_field_ref_name`, ref) +compute___expr__get_field_ref_name <- function(x){ + .Call(`_arrow_compute___expr__get_field_ref_name`, x) } -dataset___expr__scalar <- function(x){ - .Call(`_arrow_dataset___expr__scalar`, x) +compute___expr__scalar <- function(x){ + .Call(`_arrow_compute___expr__scalar`, x) } -dataset___expr__ToString <- function(x){ - .Call(`_arrow_dataset___expr__ToString`, x) +compute___expr__ToString <- function(x){ + .Call(`_arrow_compute___expr__ToString`, x) } ipc___WriteFeather__Table <- function(stream, table, version, chunk_size, compression, compression_level){ diff --git a/r/R/expression.R b/r/R/expression.R index b3fdd52a5d05f..1e104677d8bfa 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -253,7 +253,7 @@ print.array_expression <- function(x, ...) { #' @export Expression <- R6Class("Expression", inherit = ArrowObject, public = list( - ToString = function() dataset___expr__ToString(self), + ToString = function() compute___expr__ToString(self), cast = function(to_type, safe = TRUE, ...) { opts <- list( to_type = to_type, @@ -265,7 +265,7 @@ Expression <- R6Class("Expression", inherit = ArrowObject, } ), active = list( - field_name = function() dataset___expr__get_field_ref_name(self) + field_name = function() compute___expr__get_field_ref_name(self) ) ) Expression$create <- function(function_name, @@ -273,14 +273,14 @@ Expression$create <- function(function_name, args = list(...), options = empty_named_list()) { assert_that(is.string(function_name)) - dataset___expr__call(function_name, args, options) + compute___expr__call(function_name, args, options) } Expression$field_ref <- function(name) { assert_that(is.string(name)) - dataset___expr__field_ref(name) + compute___expr__field_ref(name) } Expression$scalar <- function(x) { - dataset___expr__scalar(Scalar$create(x)) + compute___expr__scalar(Scalar$create(x)) } build_dataset_expression <- function(FUN, diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index 795027e1f24a0..b8d4dc01badf0 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -28,11 +28,8 @@ delimiter for text files `format = "parquet"``: \itemize{ -\item \code{use_buffered_stream}: Read files through buffered input streams rather than -loading entire row groups at once. This may be enabled -to reduce memory overhead. Disabled by default. -\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB. \item \code{dict_columns}: Names of columns which should be read as dictionaries. +\item Any Parquet options from \link{FragmentScanOptions}. } \code{format = "text"}: see \link{CsvParseOptions}. Note that you can specify them either diff --git a/r/man/FragmentScanOptions.Rd b/r/man/FragmentScanOptions.Rd index 8bafbb0b21c87..103d0589505aa 100644 --- a/r/man/FragmentScanOptions.Rd +++ b/r/man/FragmentScanOptions.Rd @@ -3,6 +3,7 @@ \name{FragmentScanOptions} \alias{FragmentScanOptions} \alias{CsvFragmentScanOptions} +\alias{ParquetFragmentScanOptions} \title{Format-specific scan options} \description{ A \code{FragmentScanOptions} holds options specific to a \code{FileFormat} and a scan @@ -14,14 +15,24 @@ operation. \itemize{ \item \code{format}: A string identifier of the file format. Currently supported values: \itemize{ +\item "parquet" \item "csv"/"text", aliases for the same format. } \item \code{...}: Additional format-specific options +`format = "parquet"``: +\itemize{ +\item \code{use_buffered_stream}: Read files through buffered input streams rather than +loading entire row groups at once. This may be enabled +to reduce memory overhead. Disabled by default. +\item \code{buffer_size}: Size of buffered stream, if enabled. Default is 8KB. +\item \code{pre_buffer}: Pre-buffer the raw Parquet data. This can improve performance +on high-latency filesystems. Disabled by default. \code{format = "text"}: see \link{CsvConvertOptions}. Note that options can only be specified with the Arrow C++ library naming. Also, "block_size" from \link{CsvReadOptions} may be given. } +} It returns the appropriate subclass of \code{FragmentScanOptions} (e.g. \code{CsvFragmentScanOptions}). diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd index 0c19402a045d2..ca6d32a895a4d 100644 --- a/r/man/arrow-package.Rd +++ b/r/man/arrow-package.Rd @@ -26,6 +26,8 @@ Useful links: Authors: \itemize{ + \item Ian Cook \email{ianmcook@gmail.com} + \item Jonathan Keane \email{jkeane@gmail.com} \item Romain François \email{romain@rstudio.com} (\href{https://orcid.org/0000-0002-2444-4226}{ORCID}) \item Jeroen Ooms \email{jeroen@berkeley.edu} \item Apache Arrow \email{dev@arrow.apache.org} [copyright holder] diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index c5ef6343ced03..3feef14a87358 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1855,11 +1855,11 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectNames(SEXP sb_sexp, SEXP // dataset.cpp #if defined(ARROW_R_WITH_DATASET) -void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr& sb, const std::vector>& exprs, const std::vector& names); +void dataset___ScannerBuilder__ProjectExprs(const std::shared_ptr& sb, const std::vector>& exprs, const std::vector& names); extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP exprs_sexp, SEXP names_sexp){ BEGIN_CPP11 arrow::r::Input&>::type sb(sb_sexp); - arrow::r::Input>&>::type exprs(exprs_sexp); + arrow::r::Input>&>::type exprs(exprs_sexp); arrow::r::Input&>::type names(names_sexp); dataset___ScannerBuilder__ProjectExprs(sb, exprs, names); return R_NilValue; @@ -1873,11 +1873,11 @@ extern "C" SEXP _arrow_dataset___ScannerBuilder__ProjectExprs(SEXP sb_sexp, SEXP // dataset.cpp #if defined(ARROW_R_WITH_DATASET) -void dataset___ScannerBuilder__Filter(const std::shared_ptr& sb, const std::shared_ptr& expr); +void dataset___ScannerBuilder__Filter(const std::shared_ptr& sb, const std::shared_ptr& expr); extern "C" SEXP _arrow_dataset___ScannerBuilder__Filter(SEXP sb_sexp, SEXP expr_sexp){ BEGIN_CPP11 arrow::r::Input&>::type sb(sb_sexp); - arrow::r::Input&>::type expr(expr_sexp); + arrow::r::Input&>::type expr(expr_sexp); dataset___ScannerBuilder__Filter(sb, expr); return R_NilValue; END_CPP11 @@ -2927,79 +2927,79 @@ extern "C" SEXP _arrow_FixedSizeListType__list_size(SEXP type_sexp){ #endif // expression.cpp -#if defined(ARROW_R_WITH_DATASET) -std::shared_ptr dataset___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options); -extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){ +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr compute___expr__call(std::string func_name, cpp11::list argument_list, cpp11::list options); +extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){ BEGIN_CPP11 arrow::r::Input::type func_name(func_name_sexp); arrow::r::Input::type argument_list(argument_list_sexp); arrow::r::Input::type options(options_sexp); - return cpp11::as_sexp(dataset___expr__call(func_name, argument_list, options)); + return cpp11::as_sexp(compute___expr__call(func_name, argument_list, options)); END_CPP11 } #else -extern "C" SEXP _arrow_dataset___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){ - Rf_error("Cannot call dataset___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_compute___expr__call(SEXP func_name_sexp, SEXP argument_list_sexp, SEXP options_sexp){ + Rf_error("Cannot call compute___expr__call(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif // expression.cpp -#if defined(ARROW_R_WITH_DATASET) -std::shared_ptr dataset___expr__field_ref(std::string name); -extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){ +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr compute___expr__field_ref(std::string name); +extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){ BEGIN_CPP11 arrow::r::Input::type name(name_sexp); - return cpp11::as_sexp(dataset___expr__field_ref(name)); + return cpp11::as_sexp(compute___expr__field_ref(name)); END_CPP11 } #else -extern "C" SEXP _arrow_dataset___expr__field_ref(SEXP name_sexp){ - Rf_error("Cannot call dataset___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_compute___expr__field_ref(SEXP name_sexp){ + Rf_error("Cannot call compute___expr__field_ref(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif // expression.cpp -#if defined(ARROW_R_WITH_DATASET) -std::string dataset___expr__get_field_ref_name(const std::shared_ptr& ref); -extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){ +#if defined(ARROW_R_WITH_ARROW) +std::string compute___expr__get_field_ref_name(const std::shared_ptr& x); +extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){ BEGIN_CPP11 - arrow::r::Input&>::type ref(ref_sexp); - return cpp11::as_sexp(dataset___expr__get_field_ref_name(ref)); + arrow::r::Input&>::type x(x_sexp); + return cpp11::as_sexp(compute___expr__get_field_ref_name(x)); END_CPP11 } #else -extern "C" SEXP _arrow_dataset___expr__get_field_ref_name(SEXP ref_sexp){ - Rf_error("Cannot call dataset___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_compute___expr__get_field_ref_name(SEXP x_sexp){ + Rf_error("Cannot call compute___expr__get_field_ref_name(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif // expression.cpp -#if defined(ARROW_R_WITH_DATASET) -std::shared_ptr dataset___expr__scalar(const std::shared_ptr& x); -extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){ +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr compute___expr__scalar(const std::shared_ptr& x); +extern "C" SEXP _arrow_compute___expr__scalar(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input&>::type x(x_sexp); - return cpp11::as_sexp(dataset___expr__scalar(x)); + return cpp11::as_sexp(compute___expr__scalar(x)); END_CPP11 } #else -extern "C" SEXP _arrow_dataset___expr__scalar(SEXP x_sexp){ - Rf_error("Cannot call dataset___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_compute___expr__scalar(SEXP x_sexp){ + Rf_error("Cannot call compute___expr__scalar(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif // expression.cpp -#if defined(ARROW_R_WITH_DATASET) -std::string dataset___expr__ToString(const std::shared_ptr& x); -extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){ +#if defined(ARROW_R_WITH_ARROW) +std::string compute___expr__ToString(const std::shared_ptr& x); +extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){ BEGIN_CPP11 - arrow::r::Input&>::type x(x_sexp); - return cpp11::as_sexp(dataset___expr__ToString(x)); + arrow::r::Input&>::type x(x_sexp); + return cpp11::as_sexp(compute___expr__ToString(x)); END_CPP11 } #else -extern "C" SEXP _arrow_dataset___expr__ToString(SEXP x_sexp){ - Rf_error("Cannot call dataset___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_compute___expr__ToString(SEXP x_sexp){ + Rf_error("Cannot call compute___expr__ToString(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -6793,11 +6793,11 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, - { "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, - { "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, - { "_arrow_dataset___expr__get_field_ref_name", (DL_FUNC) &_arrow_dataset___expr__get_field_ref_name, 1}, - { "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, - { "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, + { "_arrow_compute___expr__call", (DL_FUNC) &_arrow_compute___expr__call, 3}, + { "_arrow_compute___expr__field_ref", (DL_FUNC) &_arrow_compute___expr__field_ref, 1}, + { "_arrow_compute___expr__get_field_ref_name", (DL_FUNC) &_arrow_compute___expr__get_field_ref_name, 1}, + { "_arrow_compute___expr__scalar", (DL_FUNC) &_arrow_compute___expr__scalar, 1}, + { "_arrow_compute___expr__ToString", (DL_FUNC) &_arrow_compute___expr__ToString, 1}, { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index b94ab76472965..5f7c725ffecc7 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -55,6 +55,7 @@ namespace ds = ::arrow::dataset; #endif +namespace compute = ::arrow::compute; namespace fs = ::arrow::fs; SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp index f4d7746eb10c6..7d8ccae6eeecb 100644 --- a/r/src/dataset.cpp +++ b/r/src/dataset.cpp @@ -31,6 +31,7 @@ namespace ds = ::arrow::dataset; namespace fs = ::arrow::fs; +namespace compute = ::arrow::compute; namespace cpp11 { @@ -370,10 +371,10 @@ void dataset___ScannerBuilder__ProjectNames(const std::shared_ptr& sb, - const std::vector>& exprs, + const std::vector>& exprs, const std::vector& names) { // We have shared_ptrs of expressions but need the Expressions - std::vector expressions; + std::vector expressions; for (auto expr : exprs) { expressions.push_back(*expr); } @@ -382,7 +383,7 @@ void dataset___ScannerBuilder__ProjectExprs( // [[dataset::export]] void dataset___ScannerBuilder__Filter(const std::shared_ptr& sb, - const std::shared_ptr& expr) { + const std::shared_ptr& expr) { StopIfNotOk(sb->Filter(*expr)); } diff --git a/r/src/expression.cpp b/r/src/expression.cpp index 0e8fd52034d10..798853edd720d 100644 --- a/r/src/expression.cpp +++ b/r/src/expression.cpp @@ -17,54 +17,54 @@ #include "./arrow_types.h" -#if defined(ARROW_R_WITH_DATASET) +#if defined(ARROW_R_WITH_ARROW) #include -#include -namespace ds = ::arrow::dataset; +#include -std::shared_ptr make_compute_options( - std::string func_name, cpp11::list options); +namespace compute = ::arrow::compute; -// [[dataset::export]] -std::shared_ptr dataset___expr__call(std::string func_name, - cpp11::list argument_list, - cpp11::list options) { - std::vector arguments; +std::shared_ptr make_compute_options(std::string func_name, + cpp11::list options); + +// [[arrow::export]] +std::shared_ptr compute___expr__call(std::string func_name, + cpp11::list argument_list, + cpp11::list options) { + std::vector arguments; for (SEXP argument : argument_list) { - auto argument_ptr = cpp11::as_cpp>(argument); + auto argument_ptr = cpp11::as_cpp>(argument); arguments.push_back(*argument_ptr); } auto options_ptr = make_compute_options(func_name, options); - return std::make_shared( - ds::call(std::move(func_name), std::move(arguments), std::move(options_ptr))); + return std::make_shared( + compute::call(std::move(func_name), std::move(arguments), std::move(options_ptr))); } -// [[dataset::export]] -std::shared_ptr dataset___expr__field_ref(std::string name) { - return std::make_shared(ds::field_ref(std::move(name))); +// [[arrow::export]] +std::shared_ptr compute___expr__field_ref(std::string name) { + return std::make_shared(compute::field_ref(std::move(name))); } -// [[dataset::export]] -std::string dataset___expr__get_field_ref_name( - const std::shared_ptr& ref) { - auto field_ref = ref->field_ref(); - if (field_ref == nullptr) { - return ""; +// [[arrow::export]] +std::string compute___expr__get_field_ref_name( + const std::shared_ptr& x) { + if (auto field_ref = x->field_ref()) { + return *field_ref->name(); } - return *field_ref->name(); + return ""; } -// [[dataset::export]] -std::shared_ptr dataset___expr__scalar( +// [[arrow::export]] +std::shared_ptr compute___expr__scalar( const std::shared_ptr& x) { - return std::make_shared(ds::literal(std::move(x))); + return std::make_shared(compute::literal(std::move(x))); } -// [[dataset::export]] -std::string dataset___expr__ToString(const std::shared_ptr& x) { +// [[arrow::export]] +std::string compute___expr__ToString(const std::shared_ptr& x) { return x->ToString(); }