diff --git a/.github/workflows/python-ci-single.yml b/.github/workflows/python-ci-single.yml index 7bc89f4e68..5292f97b2b 100644 --- a/.github/workflows/python-ci-single.yml +++ b/.github/workflows/python-ci-single.yml @@ -51,9 +51,13 @@ jobs: - name: Run pre-commit hooks on all files run: python -m pip -v install pre-commit && pre-commit run -a -v + # Skip files in apis/r/src which are: + # * nanoarrow.c/h + # * Auto-generated by Rcpp + # * Things which Dirk doesn't want to be format-checked - name: Check C++ Format shell: bash - run: ./scripts/run-clang-format.sh . clang-format 0 $(find libtiledbsoma apis/python/src apis/r/src -name "*.cc" -or -name "*.cpp" -or -name "*.h" | grep -v external) + run: ./scripts/run-clang-format.sh . clang-format 0 $(find libtiledbsoma apis/python/src -name "*.cc" -or -name "*.cpp" -or -name "*.h" | grep -v external) build: runs-on: ${{ inputs.os }} diff --git a/Makefile b/Makefile index d3eb9f5e91..fbcb22d076 100644 --- a/Makefile +++ b/Makefile @@ -46,15 +46,19 @@ data: # format # ------------------------------------------------------------------- +# Skip files in apis/r/src which are: +# * nanoarrow.c/h +# * Auto-generated by Rcpp +# * Things which Dirk doesn't want to be format-checked .PHONY: check-format check-format: @./scripts/run-clang-format.sh . clang-format 0 \ - `find libtiledbsoma apis/python/src apis/r/src -name "*.cc" -or -name "*.cpp" -or -name "*.h"` + `find libtiledbsoma apis/python/src -name "*.cc" -or -name "*.cpp" -or -name "*.h"` .PHONY: format format: @./scripts/run-clang-format.sh . clang-format 1 \ - `find libtiledbsoma apis/python/src apis/r/src -name "*.cc" -or -name "*.cpp" -or -name "*.h"` + `find libtiledbsoma apis/python/src -name "*.cc" -or -name "*.cpp" -or -name "*.h"` # clean # ------------------------------------------------------------------- diff --git a/apis/r/src/RcppExports.cpp b/apis/r/src/RcppExports.cpp index 1eedda5882..25652298d9 100644 --- a/apis/r/src/RcppExports.cpp +++ b/apis/r/src/RcppExports.cpp @@ -1,335 +1,237 @@ // Generated by using Rcpp::compileAttributes() -> do not edit by hand // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -#include #include "../inst/include/tiledbsoma_types.h" +#include using namespace Rcpp; #ifdef RCPP_USE_GLOBAL_ROSTREAM -Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); +Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif // soma_array_reader -Rcpp::List soma_array_reader( - const std::string& uri, - Rcpp::Nullable colnames, - Rcpp::Nullable> qc, - Rcpp::Nullable dim_points, - Rcpp::Nullable dim_ranges, - std::string batch_size, - std::string result_order, - const std::string& loglevel, - Rcpp::Nullable config); -RcppExport SEXP _tiledbsoma_soma_array_reader( - SEXP uriSEXP, - SEXP colnamesSEXP, - SEXP qcSEXP, - SEXP dim_pointsSEXP, - SEXP dim_rangesSEXP, - SEXP batch_sizeSEXP, - SEXP result_orderSEXP, - SEXP loglevelSEXP, - SEXP configSEXP) { - BEGIN_RCPP +Rcpp::List soma_array_reader(const std::string& uri, Rcpp::Nullable colnames, Rcpp::Nullable> qc, Rcpp::Nullable dim_points, Rcpp::Nullable dim_ranges, std::string batch_size, std::string result_order, const std::string& loglevel, Rcpp::Nullable config); +RcppExport SEXP _tiledbsoma_soma_array_reader(SEXP uriSEXP, SEXP colnamesSEXP, SEXP qcSEXP, SEXP dim_pointsSEXP, SEXP dim_rangesSEXP, SEXP batch_sizeSEXP, SEXP result_orderSEXP, SEXP loglevelSEXP, SEXP configSEXP) { +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type uri(uriSEXP); - Rcpp::traits::input_parameter>::type - colnames(colnamesSEXP); - Rcpp::traits::input_parameter< - Rcpp::Nullable>>::type qc(qcSEXP); - Rcpp::traits::input_parameter>::type dim_points( - dim_pointsSEXP); - Rcpp::traits::input_parameter>::type dim_ranges( - dim_rangesSEXP); - Rcpp::traits::input_parameter::type batch_size(batch_sizeSEXP); - Rcpp::traits::input_parameter::type result_order( - result_orderSEXP); - Rcpp::traits::input_parameter::type loglevel( - loglevelSEXP); - Rcpp::traits::input_parameter>::type - config(configSEXP); - rcpp_result_gen = Rcpp::wrap(soma_array_reader( - uri, - colnames, - qc, - dim_points, - dim_ranges, - batch_size, - result_order, - loglevel, - config)); + Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type colnames(colnamesSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable> >::type qc(qcSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type dim_points(dim_pointsSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type dim_ranges(dim_rangesSEXP); + Rcpp::traits::input_parameter< std::string >::type batch_size(batch_sizeSEXP); + Rcpp::traits::input_parameter< std::string >::type result_order(result_orderSEXP); + Rcpp::traits::input_parameter< const std::string& >::type loglevel(loglevelSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type config(configSEXP); + rcpp_result_gen = Rcpp::wrap(soma_array_reader(uri, colnames, qc, dim_points, dim_ranges, batch_size, result_order, loglevel, config)); return rcpp_result_gen; - END_RCPP +END_RCPP } // set_log_level void set_log_level(const std::string& level); RcppExport SEXP _tiledbsoma_set_log_level(SEXP levelSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type level(levelSEXP); + Rcpp::traits::input_parameter< const std::string& >::type level(levelSEXP); set_log_level(level); return R_NilValue; - END_RCPP +END_RCPP } // get_column_types -Rcpp::CharacterVector get_column_types( - const std::string& uri, const std::vector& colnames); +Rcpp::CharacterVector get_column_types(const std::string& uri, const std::vector& colnames); RcppExport SEXP _tiledbsoma_get_column_types(SEXP uriSEXP, SEXP colnamesSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type uri(uriSEXP); - Rcpp::traits::input_parameter&>::type - colnames(colnamesSEXP); + Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP); + Rcpp::traits::input_parameter< const std::vector& >::type colnames(colnamesSEXP); rcpp_result_gen = Rcpp::wrap(get_column_types(uri, colnames)); return rcpp_result_gen; - END_RCPP +END_RCPP } // nnz -double nnz( - const std::string& uri, Rcpp::Nullable config); +double nnz(const std::string& uri, Rcpp::Nullable config); RcppExport SEXP _tiledbsoma_nnz(SEXP uriSEXP, SEXP configSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type uri(uriSEXP); - Rcpp::traits::input_parameter>::type - config(configSEXP); + Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type config(configSEXP); rcpp_result_gen = Rcpp::wrap(nnz(uri, config)); return rcpp_result_gen; - END_RCPP +END_RCPP } // check_arrow_schema_tag bool check_arrow_schema_tag(Rcpp::XPtr xp); RcppExport SEXP _tiledbsoma_check_arrow_schema_tag(SEXP xpSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter>::type xp(xpSEXP); + Rcpp::traits::input_parameter< Rcpp::XPtr >::type xp(xpSEXP); rcpp_result_gen = Rcpp::wrap(check_arrow_schema_tag(xp)); return rcpp_result_gen; - END_RCPP +END_RCPP } // check_arrow_array_tag bool check_arrow_array_tag(Rcpp::XPtr xp); RcppExport SEXP _tiledbsoma_check_arrow_array_tag(SEXP xpSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter>::type xp(xpSEXP); + Rcpp::traits::input_parameter< Rcpp::XPtr >::type xp(xpSEXP); rcpp_result_gen = Rcpp::wrap(check_arrow_array_tag(xp)); return rcpp_result_gen; - END_RCPP +END_RCPP } // shape -Rcpp::NumericVector shape( - const std::string& uri, Rcpp::Nullable config); +Rcpp::NumericVector shape(const std::string& uri, Rcpp::Nullable config); RcppExport SEXP _tiledbsoma_shape(SEXP uriSEXP, SEXP configSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type uri(uriSEXP); - Rcpp::traits::input_parameter>::type - config(configSEXP); + Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type config(configSEXP); rcpp_result_gen = Rcpp::wrap(shape(uri, config)); return rcpp_result_gen; - END_RCPP +END_RCPP } // sr_setup -Rcpp::List sr_setup( - const std::string& uri, - Rcpp::CharacterVector config, - Rcpp::Nullable colnames, - Rcpp::Nullable> qc, - Rcpp::Nullable dim_points, - Rcpp::Nullable dim_ranges, - std::string batch_size, - std::string result_order, - Rcpp::Nullable timestamp_end, - const std::string& loglevel); -RcppExport SEXP _tiledbsoma_sr_setup( - SEXP uriSEXP, - SEXP configSEXP, - SEXP colnamesSEXP, - SEXP qcSEXP, - SEXP dim_pointsSEXP, - SEXP dim_rangesSEXP, - SEXP batch_sizeSEXP, - SEXP result_orderSEXP, - SEXP timestamp_endSEXP, - SEXP loglevelSEXP) { - BEGIN_RCPP +Rcpp::List sr_setup(const std::string& uri, Rcpp::CharacterVector config, Rcpp::Nullable colnames, Rcpp::Nullable> qc, Rcpp::Nullable dim_points, Rcpp::Nullable dim_ranges, std::string batch_size, std::string result_order, Rcpp::Nullable timestamp_end, const std::string& loglevel); +RcppExport SEXP _tiledbsoma_sr_setup(SEXP uriSEXP, SEXP configSEXP, SEXP colnamesSEXP, SEXP qcSEXP, SEXP dim_pointsSEXP, SEXP dim_rangesSEXP, SEXP batch_sizeSEXP, SEXP result_orderSEXP, SEXP timestamp_endSEXP, SEXP loglevelSEXP) { +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type uri(uriSEXP); - Rcpp::traits::input_parameter::type config( - configSEXP); - Rcpp::traits::input_parameter>::type - colnames(colnamesSEXP); - Rcpp::traits::input_parameter< - Rcpp::Nullable>>::type qc(qcSEXP); - Rcpp::traits::input_parameter>::type dim_points( - dim_pointsSEXP); - Rcpp::traits::input_parameter>::type dim_ranges( - dim_rangesSEXP); - Rcpp::traits::input_parameter::type batch_size(batch_sizeSEXP); - Rcpp::traits::input_parameter::type result_order( - result_orderSEXP); - Rcpp::traits::input_parameter>::type - timestamp_end(timestamp_endSEXP); - Rcpp::traits::input_parameter::type loglevel( - loglevelSEXP); - rcpp_result_gen = Rcpp::wrap(sr_setup( - uri, - config, - colnames, - qc, - dim_points, - dim_ranges, - batch_size, - result_order, - timestamp_end, - loglevel)); + Rcpp::traits::input_parameter< const std::string& >::type uri(uriSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type config(configSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type colnames(colnamesSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable> >::type qc(qcSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type dim_points(dim_pointsSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type dim_ranges(dim_rangesSEXP); + Rcpp::traits::input_parameter< std::string >::type batch_size(batch_sizeSEXP); + Rcpp::traits::input_parameter< std::string >::type result_order(result_orderSEXP); + Rcpp::traits::input_parameter< Rcpp::Nullable >::type timestamp_end(timestamp_endSEXP); + Rcpp::traits::input_parameter< const std::string& >::type loglevel(loglevelSEXP); + rcpp_result_gen = Rcpp::wrap(sr_setup(uri, config, colnames, qc, dim_points, dim_ranges, batch_size, result_order, timestamp_end, loglevel)); return rcpp_result_gen; - END_RCPP +END_RCPP } // sr_complete bool sr_complete(Rcpp::XPtr sr); RcppExport SEXP _tiledbsoma_sr_complete(SEXP srSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter>::type sr(srSEXP); + Rcpp::traits::input_parameter< Rcpp::XPtr >::type sr(srSEXP); rcpp_result_gen = Rcpp::wrap(sr_complete(sr)); return rcpp_result_gen; - END_RCPP +END_RCPP } // sr_next Rcpp::List sr_next(Rcpp::XPtr sr); RcppExport SEXP _tiledbsoma_sr_next(SEXP srSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter>::type sr(srSEXP); + Rcpp::traits::input_parameter< Rcpp::XPtr >::type sr(srSEXP); rcpp_result_gen = Rcpp::wrap(sr_next(sr)); return rcpp_result_gen; - END_RCPP +END_RCPP } // tiledbsoma_stats_enable void tiledbsoma_stats_enable(); RcppExport SEXP _tiledbsoma_tiledbsoma_stats_enable() { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; tiledbsoma_stats_enable(); return R_NilValue; - END_RCPP +END_RCPP } // tiledbsoma_stats_disable void tiledbsoma_stats_disable(); RcppExport SEXP _tiledbsoma_tiledbsoma_stats_disable() { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; tiledbsoma_stats_disable(); return R_NilValue; - END_RCPP +END_RCPP } // tiledbsoma_stats_reset void tiledbsoma_stats_reset(); RcppExport SEXP _tiledbsoma_tiledbsoma_stats_reset() { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RNGScope rcpp_rngScope_gen; tiledbsoma_stats_reset(); return R_NilValue; - END_RCPP +END_RCPP } // tiledbsoma_stats_dump std::string tiledbsoma_stats_dump(); RcppExport SEXP _tiledbsoma_tiledbsoma_stats_dump() { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = Rcpp::wrap(tiledbsoma_stats_dump()); return rcpp_result_gen; - END_RCPP +END_RCPP } // libtiledbsoma_version std::string libtiledbsoma_version(const bool compact); RcppExport SEXP _tiledbsoma_libtiledbsoma_version(SEXP compactSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type compact(compactSEXP); + Rcpp::traits::input_parameter< const bool >::type compact(compactSEXP); rcpp_result_gen = Rcpp::wrap(libtiledbsoma_version(compact)); return rcpp_result_gen; - END_RCPP +END_RCPP } // tiledb_embedded_version Rcpp::IntegerVector tiledb_embedded_version(); RcppExport SEXP _tiledbsoma_tiledb_embedded_version() { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; rcpp_result_gen = Rcpp::wrap(tiledb_embedded_version()); return rcpp_result_gen; - END_RCPP +END_RCPP } // tiledb_datatype_max_value size_t tiledb_datatype_max_value(const std::string& datatype); RcppExport SEXP _tiledbsoma_tiledb_datatype_max_value(SEXP datatypeSEXP) { - BEGIN_RCPP +BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter::type datatype( - datatypeSEXP); + Rcpp::traits::input_parameter< const std::string& >::type datatype(datatypeSEXP); rcpp_result_gen = Rcpp::wrap(tiledb_datatype_max_value(datatype)); return rcpp_result_gen; - END_RCPP +END_RCPP } static const R_CallMethodDef CallEntries[] = { - {"_tiledbsoma_soma_array_reader", - (DL_FUNC)&_tiledbsoma_soma_array_reader, - 9}, - {"_tiledbsoma_set_log_level", (DL_FUNC)&_tiledbsoma_set_log_level, 1}, - {"_tiledbsoma_get_column_types", (DL_FUNC)&_tiledbsoma_get_column_types, 2}, - {"_tiledbsoma_nnz", (DL_FUNC)&_tiledbsoma_nnz, 2}, - {"_tiledbsoma_check_arrow_schema_tag", - (DL_FUNC)&_tiledbsoma_check_arrow_schema_tag, - 1}, - {"_tiledbsoma_check_arrow_array_tag", - (DL_FUNC)&_tiledbsoma_check_arrow_array_tag, - 1}, - {"_tiledbsoma_shape", (DL_FUNC)&_tiledbsoma_shape, 2}, - {"_tiledbsoma_sr_setup", (DL_FUNC)&_tiledbsoma_sr_setup, 10}, - {"_tiledbsoma_sr_complete", (DL_FUNC)&_tiledbsoma_sr_complete, 1}, - {"_tiledbsoma_sr_next", (DL_FUNC)&_tiledbsoma_sr_next, 1}, - {"_tiledbsoma_tiledbsoma_stats_enable", - (DL_FUNC)&_tiledbsoma_tiledbsoma_stats_enable, - 0}, - {"_tiledbsoma_tiledbsoma_stats_disable", - (DL_FUNC)&_tiledbsoma_tiledbsoma_stats_disable, - 0}, - {"_tiledbsoma_tiledbsoma_stats_reset", - (DL_FUNC)&_tiledbsoma_tiledbsoma_stats_reset, - 0}, - {"_tiledbsoma_tiledbsoma_stats_dump", - (DL_FUNC)&_tiledbsoma_tiledbsoma_stats_dump, - 0}, - {"_tiledbsoma_libtiledbsoma_version", - (DL_FUNC)&_tiledbsoma_libtiledbsoma_version, - 1}, - {"_tiledbsoma_tiledb_embedded_version", - (DL_FUNC)&_tiledbsoma_tiledb_embedded_version, - 0}, - {"_tiledbsoma_tiledb_datatype_max_value", - (DL_FUNC)&_tiledbsoma_tiledb_datatype_max_value, - 1}, - {NULL, NULL, 0}}; + {"_tiledbsoma_soma_array_reader", (DL_FUNC) &_tiledbsoma_soma_array_reader, 9}, + {"_tiledbsoma_set_log_level", (DL_FUNC) &_tiledbsoma_set_log_level, 1}, + {"_tiledbsoma_get_column_types", (DL_FUNC) &_tiledbsoma_get_column_types, 2}, + {"_tiledbsoma_nnz", (DL_FUNC) &_tiledbsoma_nnz, 2}, + {"_tiledbsoma_check_arrow_schema_tag", (DL_FUNC) &_tiledbsoma_check_arrow_schema_tag, 1}, + {"_tiledbsoma_check_arrow_array_tag", (DL_FUNC) &_tiledbsoma_check_arrow_array_tag, 1}, + {"_tiledbsoma_shape", (DL_FUNC) &_tiledbsoma_shape, 2}, + {"_tiledbsoma_sr_setup", (DL_FUNC) &_tiledbsoma_sr_setup, 10}, + {"_tiledbsoma_sr_complete", (DL_FUNC) &_tiledbsoma_sr_complete, 1}, + {"_tiledbsoma_sr_next", (DL_FUNC) &_tiledbsoma_sr_next, 1}, + {"_tiledbsoma_tiledbsoma_stats_enable", (DL_FUNC) &_tiledbsoma_tiledbsoma_stats_enable, 0}, + {"_tiledbsoma_tiledbsoma_stats_disable", (DL_FUNC) &_tiledbsoma_tiledbsoma_stats_disable, 0}, + {"_tiledbsoma_tiledbsoma_stats_reset", (DL_FUNC) &_tiledbsoma_tiledbsoma_stats_reset, 0}, + {"_tiledbsoma_tiledbsoma_stats_dump", (DL_FUNC) &_tiledbsoma_tiledbsoma_stats_dump, 0}, + {"_tiledbsoma_libtiledbsoma_version", (DL_FUNC) &_tiledbsoma_libtiledbsoma_version, 1}, + {"_tiledbsoma_tiledb_embedded_version", (DL_FUNC) &_tiledbsoma_tiledb_embedded_version, 0}, + {"_tiledbsoma_tiledb_datatype_max_value", (DL_FUNC) &_tiledbsoma_tiledb_datatype_max_value, 1}, + {NULL, NULL, 0} +}; -RcppExport void R_init_tiledbsoma(DllInfo* dll) { +RcppExport void R_init_tiledbsoma(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } diff --git a/apis/r/src/nanoarrow.h b/apis/r/src/nanoarrow.h index 31effc520b..90ce2dc06a 100644 --- a/apis/r/src/nanoarrow.h +++ b/apis/r/src/nanoarrow.h @@ -23,9 +23,9 @@ #define NANOARROW_VERSION_PATCH 0 #define NANOARROW_VERSION "0.2.0-SNAPSHOT" -#define NANOARROW_VERSION_INT \ - (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ - NANOARROW_VERSION_PATCH) +#define NANOARROW_VERSION_INT \ + (NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \ + NANOARROW_VERSION_PATCH) // #define NANOARROW_NAMESPACE YourNamespaceHere @@ -53,6 +53,8 @@ #include #include + + #ifdef __cplusplus extern "C" { #endif @@ -63,11 +65,11 @@ extern "C" { /// \defgroup nanoarrow-arrow-cdata Arrow C Data interface /// /// The Arrow C Data (https://arrow.apache.org/docs/format/CDataInterface.html) -/// and Arrow C Stream -/// (https://arrow.apache.org/docs/format/CStreamInterface.html) interfaces are -/// part of the Arrow Columnar Format specification -/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow -/// documentation for documentation of these structures. +/// and Arrow C Stream (https://arrow.apache.org/docs/format/CStreamInterface.html) +/// interfaces are part of the +/// Arrow Columnar Format specification +/// (https://arrow.apache.org/docs/format/Columnar.html). See the Arrow documentation for +/// documentation of these structures. /// /// @{ @@ -79,36 +81,36 @@ extern "C" { #define ARROW_FLAG_MAP_KEYS_SORTED 4 struct ArrowSchema { - // Array type description - const char* format; - const char* name; - const char* metadata; - int64_t flags; - int64_t n_children; - struct ArrowSchema** children; - struct ArrowSchema* dictionary; - - // Release callback - void (*release)(struct ArrowSchema*); - // Opaque producer-specific data - void* private_data; + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; }; struct ArrowArray { - // Array data description - int64_t length; - int64_t null_count; - int64_t offset; - int64_t n_buffers; - int64_t n_children; - const void** buffers; - struct ArrowArray** children; - struct ArrowArray* dictionary; - - // Release callback - void (*release)(struct ArrowArray*); - // Opaque producer-specific data - void* private_data; + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; }; #endif // ARROW_C_DATA_INTERFACE @@ -117,67 +119,61 @@ struct ArrowArray { #define ARROW_C_STREAM_INTERFACE struct ArrowArrayStream { - // Callback to get the stream type - // (will be the same for all arrays in the stream). - // - // Return value: 0 if successful, an `errno`-compatible error code - // otherwise. - // - // If successful, the ArrowSchema must be released independently from the - // stream. - int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); - - // Callback to get the next array - // (if no error and the array is released, the stream has ended) - // - // Return value: 0 if successful, an `errno`-compatible error code - // otherwise. - // - // If successful, the ArrowArray must be released independently from the - // stream. - int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); - - // Callback to get optional detailed error information. - // This must only be called if the last stream operation failed - // with a non-0 return code. - // - // Return value: pointer to a null-terminated character array describing - // the last error, or NULL if no description is available. - // - // The returned pointer is only valid until the next operation on this - // stream (including release). - const char* (*get_last_error)(struct ArrowArrayStream*); - - // Release callback: release the stream's own resources. - // Note that arrays returned by `get_next` must be individually released. - void (*release)(struct ArrowArrayStream*); - - // Opaque producer-specific data - void* private_data; + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; }; #endif // ARROW_C_STREAM_INTERFACE #endif // ARROW_FLAG_DICTIONARY_ORDERED /// \brief Move the contents of src into dst and set src->release to NULL -static inline void ArrowSchemaMove( - struct ArrowSchema* src, struct ArrowSchema* dst) { - memcpy(dst, src, sizeof(struct ArrowSchema)); - src->release = NULL; +static inline void ArrowSchemaMove(struct ArrowSchema* src, struct ArrowSchema* dst) { + memcpy(dst, src, sizeof(struct ArrowSchema)); + src->release = NULL; } /// \brief Move the contents of src into dst and set src->release to NULL -static inline void ArrowArrayMove( - struct ArrowArray* src, struct ArrowArray* dst) { - memcpy(dst, src, sizeof(struct ArrowArray)); - src->release = NULL; +static inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dst) { + memcpy(dst, src, sizeof(struct ArrowArray)); + src->release = NULL; } /// \brief Move the contents of src into dst and set src->release to NULL -static inline void ArrowArrayStreamMove( - struct ArrowArrayStream* src, struct ArrowArrayStream* dst) { - memcpy(dst, src, sizeof(struct ArrowArrayStream)); - src->release = NULL; +static inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, + struct ArrowArrayStream* dst) { + memcpy(dst, src, sizeof(struct ArrowArrayStream)); + src->release = NULL; } /// @} @@ -187,14 +183,13 @@ static inline void ArrowArrayStreamMove( #define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y) #define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \ - do { \ - const int NAME = (EXPR); \ - if (NAME) \ - return NAME; \ - } while (0) + do { \ + const int NAME = (EXPR); \ + if (NAME) return NAME; \ + } while (0) #define _NANOARROW_CHECK_RANGE(x_, min_, max_) \ - NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) + NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL) /// \brief Return code for success. /// \ingroup nanoarrow-errors @@ -207,14 +202,13 @@ typedef int ArrowErrorCode; /// \brief Check the result of an expression and return it if not NANOARROW_OK /// \ingroup nanoarrow-errors #define NANOARROW_RETURN_NOT_OK(EXPR) \ - _NANOARROW_RETURN_NOT_OK_IMPL( \ - _NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR) + _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_, __COUNTER__), EXPR) static char _ArrowIsLittleEndian(void) { - uint32_t check = 1; - char first_byte; - memcpy(&first_byte, &check, sizeof(char)); - return first_byte; + uint32_t check = 1; + char first_byte; + memcpy(&first_byte, &check, sizeof(char)); + return first_byte; } /// \brief Arrow type enumerator @@ -224,45 +218,45 @@ static char _ArrowIsLittleEndian(void) { /// enumerator; however, the numeric values are specifically not equal /// (i.e., do not rely on numeric comparison). enum ArrowType { - NANOARROW_TYPE_UNINITIALIZED = 0, - NANOARROW_TYPE_NA = 1, - NANOARROW_TYPE_BOOL, - NANOARROW_TYPE_UINT8, - NANOARROW_TYPE_INT8, - NANOARROW_TYPE_UINT16, - NANOARROW_TYPE_INT16, - NANOARROW_TYPE_UINT32, - NANOARROW_TYPE_INT32, - NANOARROW_TYPE_UINT64, - NANOARROW_TYPE_INT64, - NANOARROW_TYPE_HALF_FLOAT, - NANOARROW_TYPE_FLOAT, - NANOARROW_TYPE_DOUBLE, - NANOARROW_TYPE_STRING, - NANOARROW_TYPE_BINARY, - NANOARROW_TYPE_FIXED_SIZE_BINARY, - NANOARROW_TYPE_DATE32, - NANOARROW_TYPE_DATE64, - NANOARROW_TYPE_TIMESTAMP, - NANOARROW_TYPE_TIME32, - NANOARROW_TYPE_TIME64, - NANOARROW_TYPE_INTERVAL_MONTHS, - NANOARROW_TYPE_INTERVAL_DAY_TIME, - NANOARROW_TYPE_DECIMAL128, - NANOARROW_TYPE_DECIMAL256, - NANOARROW_TYPE_LIST, - NANOARROW_TYPE_STRUCT, - NANOARROW_TYPE_SPARSE_UNION, - NANOARROW_TYPE_DENSE_UNION, - NANOARROW_TYPE_DICTIONARY, - NANOARROW_TYPE_MAP, - NANOARROW_TYPE_EXTENSION, - NANOARROW_TYPE_FIXED_SIZE_LIST, - NANOARROW_TYPE_DURATION, - NANOARROW_TYPE_LARGE_STRING, - NANOARROW_TYPE_LARGE_BINARY, - NANOARROW_TYPE_LARGE_LIST, - NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO + NANOARROW_TYPE_UNINITIALIZED = 0, + NANOARROW_TYPE_NA = 1, + NANOARROW_TYPE_BOOL, + NANOARROW_TYPE_UINT8, + NANOARROW_TYPE_INT8, + NANOARROW_TYPE_UINT16, + NANOARROW_TYPE_INT16, + NANOARROW_TYPE_UINT32, + NANOARROW_TYPE_INT32, + NANOARROW_TYPE_UINT64, + NANOARROW_TYPE_INT64, + NANOARROW_TYPE_HALF_FLOAT, + NANOARROW_TYPE_FLOAT, + NANOARROW_TYPE_DOUBLE, + NANOARROW_TYPE_STRING, + NANOARROW_TYPE_BINARY, + NANOARROW_TYPE_FIXED_SIZE_BINARY, + NANOARROW_TYPE_DATE32, + NANOARROW_TYPE_DATE64, + NANOARROW_TYPE_TIMESTAMP, + NANOARROW_TYPE_TIME32, + NANOARROW_TYPE_TIME64, + NANOARROW_TYPE_INTERVAL_MONTHS, + NANOARROW_TYPE_INTERVAL_DAY_TIME, + NANOARROW_TYPE_DECIMAL128, + NANOARROW_TYPE_DECIMAL256, + NANOARROW_TYPE_LIST, + NANOARROW_TYPE_STRUCT, + NANOARROW_TYPE_SPARSE_UNION, + NANOARROW_TYPE_DENSE_UNION, + NANOARROW_TYPE_DICTIONARY, + NANOARROW_TYPE_MAP, + NANOARROW_TYPE_EXTENSION, + NANOARROW_TYPE_FIXED_SIZE_LIST, + NANOARROW_TYPE_DURATION, + NANOARROW_TYPE_LARGE_STRING, + NANOARROW_TYPE_LARGE_BINARY, + NANOARROW_TYPE_LARGE_LIST, + NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO }; /// \brief Get a string value of an enum ArrowType value @@ -270,86 +264,86 @@ enum ArrowType { /// /// Returns NULL for invalid values for type static inline const char* ArrowTypeString(enum ArrowType type) { - switch (type) { - case NANOARROW_TYPE_NA: - return "na"; - case NANOARROW_TYPE_BOOL: - return "bool"; - case NANOARROW_TYPE_UINT8: - return "uint8"; - case NANOARROW_TYPE_INT8: - return "int8"; - case NANOARROW_TYPE_UINT16: - return "uint16"; - case NANOARROW_TYPE_INT16: - return "int16"; - case NANOARROW_TYPE_UINT32: - return "uint32"; - case NANOARROW_TYPE_INT32: - return "int32"; - case NANOARROW_TYPE_UINT64: - return "uint64"; - case NANOARROW_TYPE_INT64: - return "int64"; - case NANOARROW_TYPE_HALF_FLOAT: - return "half_float"; - case NANOARROW_TYPE_FLOAT: - return "float"; - case NANOARROW_TYPE_DOUBLE: - return "double"; - case NANOARROW_TYPE_STRING: - return "string"; - case NANOARROW_TYPE_BINARY: - return "binary"; - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - return "fixed_size_binary"; - case NANOARROW_TYPE_DATE32: - return "date32"; - case NANOARROW_TYPE_DATE64: - return "date64"; - case NANOARROW_TYPE_TIMESTAMP: - return "timestamp"; - case NANOARROW_TYPE_TIME32: - return "time32"; - case NANOARROW_TYPE_TIME64: - return "time64"; - case NANOARROW_TYPE_INTERVAL_MONTHS: - return "interval_months"; - case NANOARROW_TYPE_INTERVAL_DAY_TIME: - return "interval_day_time"; - case NANOARROW_TYPE_DECIMAL128: - return "decimal128"; - case NANOARROW_TYPE_DECIMAL256: - return "decimal256"; - case NANOARROW_TYPE_LIST: - return "list"; - case NANOARROW_TYPE_STRUCT: - return "struct"; - case NANOARROW_TYPE_SPARSE_UNION: - return "sparse_union"; - case NANOARROW_TYPE_DENSE_UNION: - return "dense_union"; - case NANOARROW_TYPE_DICTIONARY: - return "dictionary"; - case NANOARROW_TYPE_MAP: - return "map"; - case NANOARROW_TYPE_EXTENSION: - return "extension"; - case NANOARROW_TYPE_FIXED_SIZE_LIST: - return "fixed_size_list"; - case NANOARROW_TYPE_DURATION: - return "duration"; - case NANOARROW_TYPE_LARGE_STRING: - return "large_string"; - case NANOARROW_TYPE_LARGE_BINARY: - return "large_binary"; - case NANOARROW_TYPE_LARGE_LIST: - return "large_list"; - case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: - return "interval_month_day_nano"; - default: - return NULL; - } + switch (type) { + case NANOARROW_TYPE_NA: + return "na"; + case NANOARROW_TYPE_BOOL: + return "bool"; + case NANOARROW_TYPE_UINT8: + return "uint8"; + case NANOARROW_TYPE_INT8: + return "int8"; + case NANOARROW_TYPE_UINT16: + return "uint16"; + case NANOARROW_TYPE_INT16: + return "int16"; + case NANOARROW_TYPE_UINT32: + return "uint32"; + case NANOARROW_TYPE_INT32: + return "int32"; + case NANOARROW_TYPE_UINT64: + return "uint64"; + case NANOARROW_TYPE_INT64: + return "int64"; + case NANOARROW_TYPE_HALF_FLOAT: + return "half_float"; + case NANOARROW_TYPE_FLOAT: + return "float"; + case NANOARROW_TYPE_DOUBLE: + return "double"; + case NANOARROW_TYPE_STRING: + return "string"; + case NANOARROW_TYPE_BINARY: + return "binary"; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + return "fixed_size_binary"; + case NANOARROW_TYPE_DATE32: + return "date32"; + case NANOARROW_TYPE_DATE64: + return "date64"; + case NANOARROW_TYPE_TIMESTAMP: + return "timestamp"; + case NANOARROW_TYPE_TIME32: + return "time32"; + case NANOARROW_TYPE_TIME64: + return "time64"; + case NANOARROW_TYPE_INTERVAL_MONTHS: + return "interval_months"; + case NANOARROW_TYPE_INTERVAL_DAY_TIME: + return "interval_day_time"; + case NANOARROW_TYPE_DECIMAL128: + return "decimal128"; + case NANOARROW_TYPE_DECIMAL256: + return "decimal256"; + case NANOARROW_TYPE_LIST: + return "list"; + case NANOARROW_TYPE_STRUCT: + return "struct"; + case NANOARROW_TYPE_SPARSE_UNION: + return "sparse_union"; + case NANOARROW_TYPE_DENSE_UNION: + return "dense_union"; + case NANOARROW_TYPE_DICTIONARY: + return "dictionary"; + case NANOARROW_TYPE_MAP: + return "map"; + case NANOARROW_TYPE_EXTENSION: + return "extension"; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + return "fixed_size_list"; + case NANOARROW_TYPE_DURATION: + return "duration"; + case NANOARROW_TYPE_LARGE_STRING: + return "large_string"; + case NANOARROW_TYPE_LARGE_BINARY: + return "large_binary"; + case NANOARROW_TYPE_LARGE_LIST: + return "large_list"; + case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO: + return "interval_month_day_nano"; + default: + return NULL; + } } /// \brief Arrow time unit enumerator @@ -358,34 +352,29 @@ static inline const char* ArrowTypeString(enum ArrowType type) { /// These names and values map to the corresponding arrow::TimeUnit::type /// enumerator. enum ArrowTimeUnit { - NANOARROW_TIME_UNIT_SECOND = 0, - NANOARROW_TIME_UNIT_MILLI = 1, - NANOARROW_TIME_UNIT_MICRO = 2, - NANOARROW_TIME_UNIT_NANO = 3 + NANOARROW_TIME_UNIT_SECOND = 0, + NANOARROW_TIME_UNIT_MILLI = 1, + NANOARROW_TIME_UNIT_MICRO = 2, + NANOARROW_TIME_UNIT_NANO = 3 }; /// \brief Validation level enumerator /// \ingroup nanoarrow-array enum ArrowValidationLevel { - /// \brief Do not validate buffer sizes or content. - NANOARROW_VALIDATION_LEVEL_NONE = 0, - - /// \brief Validate buffer sizes that depend on array length but do not - /// validate buffer - /// sizes that depend on buffer data access. - NANOARROW_VALIDATION_LEVEL_MINIMAL = 1, - - /// \brief Validate all buffer sizes, including those that require buffer - /// data access, - /// but do not perform any checks that are O(1) along the length of the - /// buffers. - NANOARROW_VALIDATION_LEVEL_DEFAULT = 2, - - /// \brief Validate all buffer sizes and all buffer content. This is useful - /// in the - /// context of untrusted input or input that may have been corrupted in - /// transit. - NANOARROW_VALIDATION_LEVEL_FULL = 3 + /// \brief Do not validate buffer sizes or content. + NANOARROW_VALIDATION_LEVEL_NONE = 0, + + /// \brief Validate buffer sizes that depend on array length but do not validate buffer + /// sizes that depend on buffer data access. + NANOARROW_VALIDATION_LEVEL_MINIMAL = 1, + + /// \brief Validate all buffer sizes, including those that require buffer data access, + /// but do not perform any checks that are O(1) along the length of the buffers. + NANOARROW_VALIDATION_LEVEL_DEFAULT = 2, + + /// \brief Validate all buffer sizes and all buffer content. This is useful in the + /// context of untrusted input or input that may have been corrupted in transit. + NANOARROW_VALIDATION_LEVEL_FULL = 3 }; /// \brief Get a string value of an enum ArrowTimeUnit value @@ -393,83 +382,83 @@ enum ArrowValidationLevel { /// /// Returns NULL for invalid values for time_unit static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) { - switch (time_unit) { - case NANOARROW_TIME_UNIT_SECOND: - return "s"; - case NANOARROW_TIME_UNIT_MILLI: - return "ms"; - case NANOARROW_TIME_UNIT_MICRO: - return "us"; - case NANOARROW_TIME_UNIT_NANO: - return "ns"; - default: - return NULL; - } -} - -/// \brief Functional types of buffers as described in the Arrow Columnar -/// Specification \ingroup nanoarrow-array-view + switch (time_unit) { + case NANOARROW_TIME_UNIT_SECOND: + return "s"; + case NANOARROW_TIME_UNIT_MILLI: + return "ms"; + case NANOARROW_TIME_UNIT_MICRO: + return "us"; + case NANOARROW_TIME_UNIT_NANO: + return "ns"; + default: + return NULL; + } +} + +/// \brief Functional types of buffers as described in the Arrow Columnar Specification +/// \ingroup nanoarrow-array-view enum ArrowBufferType { - NANOARROW_BUFFER_TYPE_NONE, - NANOARROW_BUFFER_TYPE_VALIDITY, - NANOARROW_BUFFER_TYPE_TYPE_ID, - NANOARROW_BUFFER_TYPE_UNION_OFFSET, - NANOARROW_BUFFER_TYPE_DATA_OFFSET, - NANOARROW_BUFFER_TYPE_DATA + NANOARROW_BUFFER_TYPE_NONE, + NANOARROW_BUFFER_TYPE_VALIDITY, + NANOARROW_BUFFER_TYPE_TYPE_ID, + NANOARROW_BUFFER_TYPE_UNION_OFFSET, + NANOARROW_BUFFER_TYPE_DATA_OFFSET, + NANOARROW_BUFFER_TYPE_DATA }; /// \brief An non-owning view of a string /// \ingroup nanoarrow-utils struct ArrowStringView { - /// \brief A pointer to the start of the string - /// - /// If size_bytes is 0, this value may be NULL. - const char* data; - - /// \brief The size of the string in bytes, - /// - /// (Not including the null terminator.) - int64_t size_bytes; + /// \brief A pointer to the start of the string + /// + /// If size_bytes is 0, this value may be NULL. + const char* data; + + /// \brief The size of the string in bytes, + /// + /// (Not including the null terminator.) + int64_t size_bytes; }; /// \brief Return a view of a const C string /// \ingroup nanoarrow-utils static inline struct ArrowStringView ArrowCharView(const char* value) { - struct ArrowStringView out; + struct ArrowStringView out; - out.data = value; - if (value) { - out.size_bytes = (int64_t)strlen(value); - } else { - out.size_bytes = 0; - } + out.data = value; + if (value) { + out.size_bytes = (int64_t)strlen(value); + } else { + out.size_bytes = 0; + } - return out; + return out; } /// \brief An non-owning view of a buffer /// \ingroup nanoarrow-utils struct ArrowBufferView { - /// \brief A pointer to the start of the buffer - /// - /// If size_bytes is 0, this value may be NULL. - union { - const void* data; - const int8_t* as_int8; - const uint8_t* as_uint8; - const int16_t* as_int16; - const uint16_t* as_uint16; - const int32_t* as_int32; - const uint32_t* as_uint32; - const int64_t* as_int64; - const uint64_t* as_uint64; - const double* as_double; - const float* as_float; - const char* as_char; - } data; - - /// \brief The size of the buffer in bytes - int64_t size_bytes; + /// \brief A pointer to the start of the buffer + /// + /// If size_bytes is 0, this value may be NULL. + union { + const void* data; + const int8_t* as_int8; + const uint8_t* as_uint8; + const int16_t* as_int16; + const uint16_t* as_uint16; + const int32_t* as_int32; + const uint32_t* as_uint32; + const int64_t* as_int64; + const uint64_t* as_uint64; + const double* as_double; + const float* as_float; + const char* as_char; + } data; + + /// \brief The size of the buffer in bytes + int64_t size_bytes; }; /// \brief Array buffer allocation and deallocation @@ -479,48 +468,43 @@ struct ArrowBufferView { /// to customize allocation and deallocation of buffers when constructing /// an ArrowArray. struct ArrowBufferAllocator { - /// \brief Reallocate a buffer or return NULL if it cannot be reallocated - uint8_t* (*reallocate)( - struct ArrowBufferAllocator* allocator, - uint8_t* ptr, - int64_t old_size, - int64_t new_size); - - /// \brief Deallocate a buffer allocated by this allocator - void (*free)( - struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); - - /// \brief Opaque data specific to the allocator - void* private_data; + /// \brief Reallocate a buffer or return NULL if it cannot be reallocated + uint8_t* (*reallocate)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t old_size, int64_t new_size); + + /// \brief Deallocate a buffer allocated by this allocator + void (*free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size); + + /// \brief Opaque data specific to the allocator + void* private_data; }; /// \brief An owning mutable view of a buffer /// \ingroup nanoarrow-buffer struct ArrowBuffer { - /// \brief A pointer to the start of the buffer - /// - /// If capacity_bytes is 0, this value may be NULL. - uint8_t* data; + /// \brief A pointer to the start of the buffer + /// + /// If capacity_bytes is 0, this value may be NULL. + uint8_t* data; - /// \brief The size of the buffer in bytes - int64_t size_bytes; + /// \brief The size of the buffer in bytes + int64_t size_bytes; - /// \brief The capacity of the buffer in bytes - int64_t capacity_bytes; + /// \brief The capacity of the buffer in bytes + int64_t capacity_bytes; - /// \brief The allocator that will be used to reallocate and/or free the - /// buffer - struct ArrowBufferAllocator allocator; + /// \brief The allocator that will be used to reallocate and/or free the buffer + struct ArrowBufferAllocator allocator; }; /// \brief An owning mutable view of a bitmap /// \ingroup nanoarrow-bitmap struct ArrowBitmap { - /// \brief An ArrowBuffer to hold the allocated memory - struct ArrowBuffer buffer; + /// \brief An ArrowBuffer to hold the allocated memory + struct ArrowBuffer buffer; - /// \brief The number of bits that have been appended to the bitmap - int64_t size_bits; + /// \brief The number of bits that have been appended to the bitmap + int64_t size_bits; }; /// \brief A description of an arrangement of buffers @@ -530,16 +514,15 @@ struct ArrowBitmap { /// calculate the size of each buffer in an ArrowArray knowing only /// the length and offset of the array. struct ArrowLayout { - /// \brief The function of each buffer - enum ArrowBufferType buffer_type[3]; + /// \brief The function of each buffer + enum ArrowBufferType buffer_type[3]; - /// \brief The size of an element each buffer or 0 if this size is variable - /// or unknown - int64_t element_size_bits[3]; + /// \brief The size of an element each buffer or 0 if this size is variable or unknown + int64_t element_size_bits[3]; - /// \brief The number of elements in the child array per element in this - /// array for a fixed-size list - int64_t child_size_elements; + /// \brief The number of elements in the child array per element in this array for a + /// fixed-size list + int64_t child_size_elements; }; /// \brief A non-owning view of an ArrowArray @@ -551,64 +534,63 @@ struct ArrowLayout { /// with the same storage type, or use it to represent a hypothetical /// ArrowArray that does not exist yet. struct ArrowArrayView { - /// \brief The underlying ArrowArray or NULL if it has not been set - struct ArrowArray* array; - - /// \brief The type used to store values in this array - /// - /// This type represents only the minimum required information to - /// extract values from the array buffers (e.g., for a Date32 array, - /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded - /// arrays, this will be the index type. - enum ArrowType storage_type; - - /// \brief The buffer types, strides, and sizes of this Array's buffers - struct ArrowLayout layout; - - /// \brief This Array's buffers as ArrowBufferView objects - struct ArrowBufferView buffer_views[3]; - - /// \brief The number of children of this view - int64_t n_children; - - /// \brief Pointers to views of this array's children - struct ArrowArrayView** children; - - /// \brief Union type id to child index mapping - /// - /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer - /// such that child_index == union_type_id_map[type_id] and - /// type_id == union_type_id_map[128 + child_index]. This value may be - /// NULL in the case where child_id == type_id. - int8_t* union_type_id_map; + /// \brief The underlying ArrowArray or NULL if it has not been set + struct ArrowArray* array; + + /// \brief The type used to store values in this array + /// + /// This type represents only the minimum required information to + /// extract values from the array buffers (e.g., for a Date32 array, + /// this value will be NANOARROW_TYPE_INT32). For dictionary-encoded + /// arrays, this will be the index type. + enum ArrowType storage_type; + + /// \brief The buffer types, strides, and sizes of this Array's buffers + struct ArrowLayout layout; + + /// \brief This Array's buffers as ArrowBufferView objects + struct ArrowBufferView buffer_views[3]; + + /// \brief The number of children of this view + int64_t n_children; + + /// \brief Pointers to views of this array's children + struct ArrowArrayView** children; + + /// \brief Union type id to child index mapping + /// + /// If storage_type is a union type, a 256-byte ArrowMalloc()ed buffer + /// such that child_index == union_type_id_map[type_id] and + /// type_id == union_type_id_map[128 + child_index]. This value may be + /// NULL in the case where child_id == type_id. + int8_t* union_type_id_map; }; // Used as the private data member for ArrowArrays allocated here and accessed // internally within inline ArrowArray* helpers. struct ArrowArrayPrivateData { - // Holder for the validity buffer (or first buffer for union types, which - // are the only type whose first buffer is not a valdiity buffer) - struct ArrowBitmap bitmap; + // Holder for the validity buffer (or first buffer for union types, which are + // the only type whose first buffer is not a valdiity buffer) + struct ArrowBitmap bitmap; - // Holder for additional buffers as required - struct ArrowBuffer buffers[2]; + // Holder for additional buffers as required + struct ArrowBuffer buffers[2]; - // The array of pointers to buffers. This must be updated after a sequence - // of appends to synchronize its values with the actual buffer addresses - // (which may have ben reallocated uring that time) - const void* buffer_data[3]; + // The array of pointers to buffers. This must be updated after a sequence + // of appends to synchronize its values with the actual buffer addresses + // (which may have ben reallocated uring that time) + const void* buffer_data[3]; - // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown - enum ArrowType storage_type; + // The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown + enum ArrowType storage_type; - // The buffer arrangement for the storage type - struct ArrowLayout layout; + // The buffer arrangement for the storage type + struct ArrowLayout layout; - // Flag to indicate if there are non-sequence union type ids. - // In the future this could be replaced with a type id<->child mapping - // to support constructing unions in append mode where type_id != - // child_index - int8_t union_type_id_is_child_index; + // Flag to indicate if there are non-sequence union type ids. + // In the future this could be replaced with a type id<->child mapping + // to support constructing unions in append mode where type_id != child_index + int8_t union_type_id_is_child_index; }; /// \brief A representation of a fixed-precision decimal number @@ -618,47 +600,41 @@ struct ArrowArrayPrivateData { /// values set using ArrowDecimalSetInt(), ArrowDecimalSetBytes128(), /// or ArrowDecimalSetBytes256(). struct ArrowDecimal { - /// \brief An array of 64-bit integers of n_words length defined in - /// native-endian order - uint64_t words[4]; + /// \brief An array of 64-bit integers of n_words length defined in native-endian order + uint64_t words[4]; - /// \brief The number of significant digits this decimal number can - /// represent - int32_t precision; + /// \brief The number of significant digits this decimal number can represent + int32_t precision; - /// \brief The number of digits after the decimal point. This can be - /// negative. - int32_t scale; + /// \brief The number of digits after the decimal point. This can be negative. + int32_t scale; - /// \brief The number of words in the words array - int n_words; + /// \brief The number of words in the words array + int n_words; - /// \brief Cached value used by the implementation - int high_word_index; + /// \brief Cached value used by the implementation + int high_word_index; - /// \brief Cached value used by the implementation - int low_word_index; + /// \brief Cached value used by the implementation + int low_word_index; }; /// \brief Initialize a decimal with a given set of type parameters /// \ingroup nanoarrow-utils -static inline void ArrowDecimalInit( - struct ArrowDecimal* decimal, - int32_t bitwidth, - int32_t precision, - int32_t scale) { - memset(decimal->words, 0, sizeof(decimal->words)); - decimal->precision = precision; - decimal->scale = scale; - decimal->n_words = bitwidth / 8 / sizeof(uint64_t); - - if (_ArrowIsLittleEndian()) { - decimal->low_word_index = 0; - decimal->high_word_index = decimal->n_words - 1; - } else { - decimal->low_word_index = decimal->n_words - 1; - decimal->high_word_index = 0; - } +static inline void ArrowDecimalInit(struct ArrowDecimal* decimal, int32_t bitwidth, + int32_t precision, int32_t scale) { + memset(decimal->words, 0, sizeof(decimal->words)); + decimal->precision = precision; + decimal->scale = scale; + decimal->n_words = bitwidth / 8 / sizeof(uint64_t); + + if (_ArrowIsLittleEndian()) { + decimal->low_word_index = 0; + decimal->high_word_index = decimal->n_words - 1; + } else { + decimal->low_word_index = decimal->n_words - 1; + decimal->high_word_index = 0; + } } /// \brief Get a signed integer value of a sufficiently small ArrowDecimal @@ -667,40 +643,38 @@ static inline void ArrowDecimalInit( /// within the signed 64-bit integer range (A precision less than or equal /// to 18 is sufficiently small). static inline int64_t ArrowDecimalGetIntUnsafe(struct ArrowDecimal* decimal) { - return (int64_t)decimal->words[decimal->low_word_index]; + return (int64_t)decimal->words[decimal->low_word_index]; } /// \brief Copy the bytes of this decimal into a sufficiently large buffer /// \ingroup nanoarrow-utils -static inline void ArrowDecimalGetBytes( - struct ArrowDecimal* decimal, uint8_t* out) { - memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); +static inline void ArrowDecimalGetBytes(struct ArrowDecimal* decimal, uint8_t* out) { + memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t)); } /// \brief Returns 1 if the value represented by decimal is >= 0 or -1 otherwise /// \ingroup nanoarrow-utils static inline int64_t ArrowDecimalSign(struct ArrowDecimal* decimal) { - return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); + return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63); } /// \brief Sets the integer value of this decimal /// \ingroup nanoarrow-utils -static inline void ArrowDecimalSetInt( - struct ArrowDecimal* decimal, int64_t value) { - if (value < 0) { - memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); - } else { - memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t)); - } +static inline void ArrowDecimalSetInt(struct ArrowDecimal* decimal, int64_t value) { + if (value < 0) { + memset(decimal->words, 0xff, decimal->n_words * sizeof(uint64_t)); + } else { + memset(decimal->words, 0, decimal->n_words * sizeof(uint64_t)); + } - decimal->words[decimal->low_word_index] = value; + decimal->words[decimal->low_word_index] = value; } /// \brief Copy bytes from a buffer into this decimal /// \ingroup nanoarrow-utils -static inline void ArrowDecimalSetBytes( - struct ArrowDecimal* decimal, const uint8_t* value) { - memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); +static inline void ArrowDecimalSetBytes(struct ArrowDecimal* decimal, + const uint8_t* value) { + memcpy(decimal->words, value, decimal->n_words * sizeof(uint64_t)); } #ifdef __cplusplus @@ -732,9 +706,11 @@ static inline void ArrowDecimalSetBytes( #include #include -// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will -// set this define in nanoarrow_config.h. If not, you can optionally #define -// NANOARROW_NAMESPACE MyNamespace here. + + +// If using CMake, optionally pass -DNANOARROW_NAMESPACE=MyNamespace which will set this +// define in nanoarrow_config.h. If not, you can optionally #define NANOARROW_NAMESPACE +// MyNamespace here. // This section remaps the non-prefixed symbols to the prefixed symbols so that // code written against this build can be used independent of the value of @@ -743,108 +719,94 @@ static inline void ArrowDecimalSetBytes( #define NANOARROW_CAT(A, B) A##B #define NANOARROW_SYMBOL(A, B) NANOARROW_CAT(A, B) -#define ArrowNanoarrowVersion \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) +#define ArrowNanoarrowVersion NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersion) #define ArrowNanoarrowVersionInt \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) -#define ArrowErrorMessage \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorMessage) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowNanoarrowVersionInt) +#define ArrowErrorMessage NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorMessage) #define ArrowMalloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMalloc) #define ArrowRealloc NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowRealloc) #define ArrowFree NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowFree) #define ArrowBufferAllocatorDefault \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferAllocatorDefault) #define ArrowBufferDeallocator \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBufferDeallocator) #define ArrowErrorSet NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowErrorSet) #define ArrowLayoutInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowLayoutInit) #define ArrowSchemaInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInit) #define ArrowSchemaInitFromType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) -#define ArrowSchemaSetType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaInitFromType) +#define ArrowSchemaSetType NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetType) #define ArrowSchemaSetTypeStruct \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeStruct) #define ArrowSchemaSetTypeFixedSize \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize) #define ArrowSchemaSetTypeDecimal \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal) #define ArrowSchemaSetTypeDateTime \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime) #define ArrowSchemaSetTypeUnion \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) -#define ArrowSchemaDeepCopy \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) -#define ArrowSchemaSetFormat \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) -#define ArrowSchemaSetName \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeUnion) +#define ArrowSchemaDeepCopy NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaDeepCopy) +#define ArrowSchemaSetFormat NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetFormat) +#define ArrowSchemaSetName NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetName) #define ArrowSchemaSetMetadata \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetMetadata) #define ArrowSchemaAllocateChildren \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateChildren) #define ArrowSchemaAllocateDictionary \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaAllocateDictionary) #define ArrowMetadataReaderInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderInit) #define ArrowMetadataReaderRead \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead) -#define ArrowMetadataSizeOf \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf) -#define ArrowMetadataHasKey \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey) -#define ArrowMetadataGetValue \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataReaderRead) +#define ArrowMetadataSizeOf NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataSizeOf) +#define ArrowMetadataHasKey NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataHasKey) +#define ArrowMetadataGetValue NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataGetValue) #define ArrowMetadataBuilderInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderInit) #define ArrowMetadataBuilderAppend \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderAppend) #define ArrowMetadataBuilderSet \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderSet) #define ArrowMetadataBuilderRemove \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove) -#define ArrowSchemaViewInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit) -#define ArrowSchemaToString \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowMetadataBuilderRemove) +#define ArrowSchemaViewInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaViewInit) +#define ArrowSchemaToString NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaToString) #define ArrowArrayInitFromType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromType) #define ArrowArrayInitFromSchema \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayInitFromSchema) #define ArrowArrayAllocateChildren \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateChildren) #define ArrowArrayAllocateDictionary \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayAllocateDictionary) #define ArrowArraySetValidityBitmap \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap) -#define ArrowArraySetBuffer \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer) -#define ArrowArrayReserve \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetValidityBitmap) +#define ArrowArraySetBuffer NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArraySetBuffer) +#define ArrowArrayReserve NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayReserve) #define ArrowArrayFinishBuilding \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuilding) #define ArrowArrayFinishBuildingDefault \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayFinishBuildingDefault) #define ArrowArrayViewInitFromType \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromType) #define ArrowArrayViewInitFromSchema \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewInitFromSchema) #define ArrowArrayViewAllocateChildren \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewAllocateChildren) #define ArrowArrayViewSetLength \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetLength) #define ArrowArrayViewSetArray \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArray) #define ArrowArrayViewValidateFull \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull) -#define ArrowArrayViewReset \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidateFull) +#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewReset) #define ArrowBasicArrayStreamInit \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit) #define ArrowBasicArrayStreamSetArray \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamSetArray) #define ArrowBasicArrayStreamValidate \ - NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate) + NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamValidate) #endif @@ -892,8 +854,8 @@ struct ArrowBufferAllocator ArrowBufferAllocatorDefault(void); /// avoid copying an existing buffer that was not allocated using the /// infrastructure provided here (e.g., by an R or Python object). struct ArrowBufferAllocator ArrowBufferDeallocator( - void (*custom_free)( - struct ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size), + void (*custom_free)(struct ArrowBufferAllocator* allocator, uint8_t* ptr, + int64_t size), void* private_data); /// @} @@ -910,8 +872,8 @@ struct ArrowBufferAllocator ArrowBufferDeallocator( /// \brief Error type containing a UTF-8 encoded message. struct ArrowError { - /// \brief A character buffer with space for an error message. - char message[1024]; + /// \brief A character buffer with space for an error message. + char message[1024]; }; /// \brief Set the contents of an error using printf syntax @@ -957,10 +919,9 @@ void ArrowSchemaInit(struct ArrowSchema* schema); /// /// A convenience constructor for that calls ArrowSchemaInit() and /// ArrowSchemaSetType() for the common case of constructing an -/// unparameterized type. The caller is responsible for calling the -/// schema->release callback if NANOARROW_OK is returned. -ArrowErrorCode ArrowSchemaInitFromType( - struct ArrowSchema* schema, enum ArrowType type); +/// unparameterized type. The caller is responsible for calling the schema->release +/// callback if NANOARROW_OK is returned. +ArrowErrorCode ArrowSchemaInitFromType(struct ArrowSchema* schema, enum ArrowType type); /// \brief Get a human-readable summary of a Schema /// @@ -968,8 +929,8 @@ ArrowErrorCode ArrowSchemaInitFromType( /// and returns the number of characters required for the output if /// n were sufficiently large. If recursive is non-zero, the result will /// also include children. -int64_t ArrowSchemaToString( - struct ArrowSchema* schema, char* out, int64_t n, char recursive); +int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n, + char recursive); /// \brief Set the format field of a schema from an ArrowType /// @@ -977,19 +938,16 @@ int64_t ArrowSchemaToString( /// NANOARROW_TYPE_LIST, NANOARROW_TYPE_LARGE_LIST, and /// NANOARROW_TYPE_MAP, the appropriate number of children are /// allocated, initialized, and named; however, the caller must -/// ArrowSchemaSetType() on the preinitialized children. Schema must have been -/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetType( - struct ArrowSchema* schema, enum ArrowType type); +/// ArrowSchemaSetType() on the preinitialized children. Schema must have been initialized +/// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetType(struct ArrowSchema* schema, enum ArrowType type); /// \brief Set the format field and initialize children of a struct schema /// -/// The specified number of children are initialized; however, the caller is -/// responsible for calling ArrowSchemaSetType() and ArrowSchemaSetName() on -/// each child. Schema must have been initialized using ArrowSchemaInit() or -/// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeStruct( - struct ArrowSchema* schema, int64_t n_children); +/// The specified number of children are initialized; however, the caller is responsible +/// for calling ArrowSchemaSetType() and ArrowSchemaSetName() on each child. +/// Schema must have been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeStruct(struct ArrowSchema* schema, int64_t n_children); /// \brief Set the format field of a fixed-size schema /// @@ -997,55 +955,50 @@ ArrowErrorCode ArrowSchemaSetTypeStruct( /// NANOARROW_TYPE_FIXED_SIZE_BINARY or NANOARROW_TYPE_FIXED_SIZE_LIST. /// For NANOARROW_TYPE_FIXED_SIZE_LIST, the appropriate number of children are /// allocated, initialized, and named; however, the caller must -/// ArrowSchemaSetType() the first child. Schema must have been initialized -/// using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeFixedSize( - struct ArrowSchema* schema, enum ArrowType type, int32_t fixed_size); +/// ArrowSchemaSetType() the first child. Schema must have been initialized using +/// ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct ArrowSchema* schema, + enum ArrowType type, int32_t fixed_size); /// \brief Set the format field of a decimal schema /// /// Returns EINVAL for scale <= 0 or for type that is not -/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have -/// been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeDecimal( - struct ArrowSchema* schema, - enum ArrowType type, - int32_t decimal_precision, - int32_t decimal_scale); +/// NANOARROW_TYPE_DECIMAL128 or NANOARROW_TYPE_DECIMAL256. Schema must have been +/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeDecimal(struct ArrowSchema* schema, enum ArrowType type, + int32_t decimal_precision, + int32_t decimal_scale); /// \brief Set the format field of a time, timestamp, or duration schema /// /// Returns EINVAL for type that is not /// NANOARROW_TYPE_TIME32, NANOARROW_TYPE_TIME64, /// NANOARROW_TYPE_TIMESTAMP, or NANOARROW_TYPE_DURATION. The -/// timezone parameter must be NULL for a non-timestamp type. Schema must have -/// been initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetTypeDateTime( - struct ArrowSchema* schema, - enum ArrowType type, - enum ArrowTimeUnit time_unit, - const char* timezone); +/// timezone parameter must be NULL for a non-timestamp type. Schema must have been +/// initialized using ArrowSchemaInit() or ArrowSchemaDeepCopy(). +ArrowErrorCode ArrowSchemaSetTypeDateTime(struct ArrowSchema* schema, enum ArrowType type, + enum ArrowTimeUnit time_unit, + const char* timezone); /// \brief Seet the format field of a union schema /// /// Returns EINVAL for a type that is not NANOARROW_TYPE_DENSE_UNION /// or NANOARROW_TYPE_SPARSE_UNION. The specified number of children are /// allocated, and initialized. -ArrowErrorCode ArrowSchemaSetTypeUnion( - struct ArrowSchema* schema, enum ArrowType type, int64_t n_children); +ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema* schema, enum ArrowType type, + int64_t n_children); /// \brief Make a (recursive) copy of a schema /// /// Allocates and copies fields of schema into schema_out. -ArrowErrorCode ArrowSchemaDeepCopy( - struct ArrowSchema* schema, struct ArrowSchema* schema_out); +ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema, + struct ArrowSchema* schema_out); /// \brief Copy format into schema->format /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaSetFormat( - struct ArrowSchema* schema, const char* format); +ArrowErrorCode ArrowSchemaSetFormat(struct ArrowSchema* schema, const char* format); /// \brief Copy name into schema->name /// @@ -1057,16 +1010,15 @@ ArrowErrorCode ArrowSchemaSetName(struct ArrowSchema* schema, const char* name); /// /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy. -ArrowErrorCode ArrowSchemaSetMetadata( - struct ArrowSchema* schema, const char* metadata); +ArrowErrorCode ArrowSchemaSetMetadata(struct ArrowSchema* schema, const char* metadata); /// \brief Allocate the schema->children array /// /// Includes the memory for each child struct ArrowSchema. /// schema must have been allocated using ArrowSchemaInitFromType() or /// ArrowSchemaDeepCopy(). -ArrowErrorCode ArrowSchemaAllocateChildren( - struct ArrowSchema* schema, int64_t n_children); +ArrowErrorCode ArrowSchemaAllocateChildren(struct ArrowSchema* schema, + int64_t n_children); /// \brief Allocate the schema->dictionary member /// @@ -1085,25 +1037,24 @@ ArrowErrorCode ArrowSchemaAllocateDictionary(struct ArrowSchema* schema); /// The ArrowMetadataReader does not own any data and is only valid /// for the lifetime of the underlying metadata pointer. struct ArrowMetadataReader { - /// \brief A metadata string from a schema->metadata field. - const char* metadata; + /// \brief A metadata string from a schema->metadata field. + const char* metadata; - /// \brief The current offset into the metadata string - int64_t offset; + /// \brief The current offset into the metadata string + int64_t offset; - /// \brief The number of remaining keys - int32_t remaining_keys; + /// \brief The number of remaining keys + int32_t remaining_keys; }; /// \brief Initialize an ArrowMetadataReader -ArrowErrorCode ArrowMetadataReaderInit( - struct ArrowMetadataReader* reader, const char* metadata); +ArrowErrorCode ArrowMetadataReaderInit(struct ArrowMetadataReader* reader, + const char* metadata); /// \brief Read the next key/value pair from an ArrowMetadataReader -ArrowErrorCode ArrowMetadataReaderRead( - struct ArrowMetadataReader* reader, - struct ArrowStringView* key_out, - struct ArrowStringView* value_out); +ArrowErrorCode ArrowMetadataReaderRead(struct ArrowMetadataReader* reader, + struct ArrowStringView* key_out, + struct ArrowStringView* value_out); /// \brief The number of bytes in in a key/value metadata string int64_t ArrowMetadataSizeOf(const char* metadata); @@ -1114,37 +1065,32 @@ char ArrowMetadataHasKey(const char* metadata, struct ArrowStringView key); /// \brief Extract a value from schema metadata /// /// If key does not exist in metadata, value_out is unmodified -ArrowErrorCode ArrowMetadataGetValue( - const char* metadata, - struct ArrowStringView key, - struct ArrowStringView* value_out); +ArrowErrorCode ArrowMetadataGetValue(const char* metadata, struct ArrowStringView key, + struct ArrowStringView* value_out); /// \brief Initialize a builder for schema metadata from key/value pairs /// /// metadata can be an existing metadata string or NULL to initialize /// an empty metadata string. -ArrowErrorCode ArrowMetadataBuilderInit( - struct ArrowBuffer* buffer, const char* metadata); +ArrowErrorCode ArrowMetadataBuilderInit(struct ArrowBuffer* buffer, const char* metadata); /// \brief Append a key/value pair to a buffer containing serialized metadata -ArrowErrorCode ArrowMetadataBuilderAppend( - struct ArrowBuffer* buffer, - struct ArrowStringView key, - struct ArrowStringView value); +ArrowErrorCode ArrowMetadataBuilderAppend(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); /// \brief Set a key/value pair to a buffer containing serialized metadata /// /// Ensures that the only entry for key in the metadata is set to value. /// This function maintains the existing position of (the first instance of) /// key if present in the data. -ArrowErrorCode ArrowMetadataBuilderSet( - struct ArrowBuffer* buffer, - struct ArrowStringView key, - struct ArrowStringView value); +ArrowErrorCode ArrowMetadataBuilderSet(struct ArrowBuffer* buffer, + struct ArrowStringView key, + struct ArrowStringView value); /// \brief Remove a key from a buffer containing serialized metadata -ArrowErrorCode ArrowMetadataBuilderRemove( - struct ArrowBuffer* buffer, struct ArrowStringView key); +ArrowErrorCode ArrowMetadataBuilderRemove(struct ArrowBuffer* buffer, + struct ArrowStringView key); /// @} @@ -1159,93 +1105,91 @@ ArrowErrorCode ArrowMetadataBuilderRemove( /// encouraged to use the provided getters to ensure forward /// compatiblity. struct ArrowSchemaView { - /// \brief A pointer to the schema represented by this view - struct ArrowSchema* schema; - - /// \brief The data type represented by the schema - /// - /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a - /// non-null dictionary member; datetime types are valid values. - /// This value will never be NANOARROW_TYPE_EXTENSION (see - /// extension_name and/or extension_metadata to check for - /// an extension type). - enum ArrowType type; - - /// \brief The storage data type represented by the schema - /// - /// This value will never be NANOARROW_TYPE_DICTIONARY, - /// NANOARROW_TYPE_EXTENSION or any datetime type. This value represents - /// only the type required to interpret the buffers in the array. - enum ArrowType storage_type; - - /// \brief The storage layout represented by the schema - struct ArrowLayout layout; - - /// \brief The extension type name if it exists - /// - /// If the ARROW:extension:name key is present in schema.metadata, - /// extension_name.data will be non-NULL. - struct ArrowStringView extension_name; - - /// \brief The extension type metadata if it exists - /// - /// If the ARROW:extension:metadata key is present in schema.metadata, - /// extension_metadata.data will be non-NULL. - struct ArrowStringView extension_metadata; - - /// \brief Format fixed size parameter - /// - /// This value is set when parsing a fixed-size binary or fixed-size - /// list schema; this value is undefined for other types. For a - /// fixed-size binary schema this value is in bytes; for a fixed-size - /// list schema this value refers to the number of child elements for - /// each element of the parent. - int32_t fixed_size; - - /// \brief Decimal bitwidth - /// - /// This value is set when parsing a decimal type schema; - /// this value is undefined for other types. - int32_t decimal_bitwidth; - - /// \brief Decimal precision - /// - /// This value is set when parsing a decimal type schema; - /// this value is undefined for other types. - int32_t decimal_precision; - - /// \brief Decimal scale - /// - /// This value is set when parsing a decimal type schema; - /// this value is undefined for other types. - int32_t decimal_scale; - - /// \brief Format time unit parameter - /// - /// This value is set when parsing a date/time type. The value is - /// undefined for other types. - enum ArrowTimeUnit time_unit; - - /// \brief Format timezone parameter - /// - /// This value is set when parsing a timestamp type and represents - /// the timezone format parameter. This value points to - /// data within the schema and is undefined for other types. - const char* timezone; - - /// \brief Union type ids parameter - /// - /// This value is set when parsing a union type and represents - /// type ids parameter. This value points to - /// data within the schema and is undefined for other types. - const char* union_type_ids; + /// \brief A pointer to the schema represented by this view + struct ArrowSchema* schema; + + /// \brief The data type represented by the schema + /// + /// This value may be NANOARROW_TYPE_DICTIONARY if the schema has a + /// non-null dictionary member; datetime types are valid values. + /// This value will never be NANOARROW_TYPE_EXTENSION (see + /// extension_name and/or extension_metadata to check for + /// an extension type). + enum ArrowType type; + + /// \brief The storage data type represented by the schema + /// + /// This value will never be NANOARROW_TYPE_DICTIONARY, NANOARROW_TYPE_EXTENSION + /// or any datetime type. This value represents only the type required to + /// interpret the buffers in the array. + enum ArrowType storage_type; + + /// \brief The storage layout represented by the schema + struct ArrowLayout layout; + + /// \brief The extension type name if it exists + /// + /// If the ARROW:extension:name key is present in schema.metadata, + /// extension_name.data will be non-NULL. + struct ArrowStringView extension_name; + + /// \brief The extension type metadata if it exists + /// + /// If the ARROW:extension:metadata key is present in schema.metadata, + /// extension_metadata.data will be non-NULL. + struct ArrowStringView extension_metadata; + + /// \brief Format fixed size parameter + /// + /// This value is set when parsing a fixed-size binary or fixed-size + /// list schema; this value is undefined for other types. For a + /// fixed-size binary schema this value is in bytes; for a fixed-size + /// list schema this value refers to the number of child elements for + /// each element of the parent. + int32_t fixed_size; + + /// \brief Decimal bitwidth + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_bitwidth; + + /// \brief Decimal precision + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_precision; + + /// \brief Decimal scale + /// + /// This value is set when parsing a decimal type schema; + /// this value is undefined for other types. + int32_t decimal_scale; + + /// \brief Format time unit parameter + /// + /// This value is set when parsing a date/time type. The value is + /// undefined for other types. + enum ArrowTimeUnit time_unit; + + /// \brief Format timezone parameter + /// + /// This value is set when parsing a timestamp type and represents + /// the timezone format parameter. This value points to + /// data within the schema and is undefined for other types. + const char* timezone; + + /// \brief Union type ids parameter + /// + /// This value is set when parsing a union type and represents + /// type ids parameter. This value points to + /// data within the schema and is undefined for other types. + const char* union_type_ids; }; /// \brief Initialize an ArrowSchemaView -ArrowErrorCode ArrowSchemaViewInit( - struct ArrowSchemaView* schema_view, - struct ArrowSchema* schema, - struct ArrowError* error); +ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view, + struct ArrowSchema* schema, struct ArrowError* error); /// @} @@ -1276,8 +1220,7 @@ static inline void ArrowBufferReset(struct ArrowBuffer* buffer); /// /// Transfers the buffer data and lifecycle management to another /// address and resets buffer. -static inline void ArrowBufferMove( - struct ArrowBuffer* src, struct ArrowBuffer* dst); +static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst); /// \brief Grow or shrink a buffer to a given capacity /// @@ -1285,84 +1228,85 @@ static inline void ArrowBufferMove( /// if shrink_to_fit is non-zero. Calling ArrowBufferResize() does not /// adjust the buffer's size member except to ensure that the invariant /// capacity >= size remains true. -static inline ArrowErrorCode ArrowBufferResize( - struct ArrowBuffer* buffer, int64_t new_capacity_bytes, char shrink_to_fit); +static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, + int64_t new_capacity_bytes, + char shrink_to_fit); /// \brief Ensure a buffer has at least a given additional capacity /// /// Ensures that the buffer has space to append at least /// additional_size_bytes, overallocating when required. -static inline ArrowErrorCode ArrowBufferReserve( - struct ArrowBuffer* buffer, int64_t additional_size_bytes); +static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, + int64_t additional_size_bytes); /// \brief Write data to buffer and increment the buffer size /// /// This function does not check that buffer has the required capacity -static inline void ArrowBufferAppendUnsafe( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes); +static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, + int64_t size_bytes); /// \brief Write data to buffer and increment the buffer size /// /// This function writes and ensures that the buffer has the required capacity, /// possibly by reallocating the buffer. Like ArrowBufferReserve, this will /// overallocate when reallocation is required. -static inline ArrowErrorCode ArrowBufferAppend( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes); +static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, + const void* data, int64_t size_bytes); /// \brief Write fill to buffer and increment the buffer size /// /// This function writes the specified number of fill bytes and /// ensures that the buffer has the required capacity, -static inline ArrowErrorCode ArrowBufferAppendFill( - struct ArrowBuffer* buffer, uint8_t value, int64_t size_bytes); +static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, + uint8_t value, int64_t size_bytes); /// \brief Write an 8-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt8( - struct ArrowBuffer* buffer, int8_t value); +static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, + int8_t value); /// \brief Write an unsigned 8-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt8( - struct ArrowBuffer* buffer, uint8_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, + uint8_t value); /// \brief Write a 16-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt16( - struct ArrowBuffer* buffer, int16_t value); +static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, + int16_t value); /// \brief Write an unsigned 16-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt16( - struct ArrowBuffer* buffer, uint16_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, + uint16_t value); /// \brief Write a 32-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt32( - struct ArrowBuffer* buffer, int32_t value); +static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, + int32_t value); /// \brief Write an unsigned 32-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt32( - struct ArrowBuffer* buffer, uint32_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, + uint32_t value); /// \brief Write a 64-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendInt64( - struct ArrowBuffer* buffer, int64_t value); +static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, + int64_t value); /// \brief Write an unsigned 64-bit integer to a buffer -static inline ArrowErrorCode ArrowBufferAppendUInt64( - struct ArrowBuffer* buffer, uint64_t value); +static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, + uint64_t value); /// \brief Write a double to a buffer -static inline ArrowErrorCode ArrowBufferAppendDouble( - struct ArrowBuffer* buffer, double value); +static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, + double value); /// \brief Write a float to a buffer -static inline ArrowErrorCode ArrowBufferAppendFloat( - struct ArrowBuffer* buffer, float value); +static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, + float value); /// \brief Write an ArrowStringView to a buffer -static inline ArrowErrorCode ArrowBufferAppendStringView( - struct ArrowBuffer* buffer, struct ArrowStringView value); +static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, + struct ArrowStringView value); /// \brief Write an ArrowBufferView to a buffer -static inline ArrowErrorCode ArrowBufferAppendBufferView( - struct ArrowBuffer* buffer, struct ArrowBufferView value); +static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, + struct ArrowBufferView value); /// @} @@ -1383,12 +1327,11 @@ static inline void ArrowBitClear(uint8_t* bits, int64_t i); static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t value); /// \brief Set a boolean value to a range in a bitmap -static inline void ArrowBitsSetTo( - uint8_t* bits, int64_t start_offset, int64_t length, uint8_t bits_are_set); +static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, + uint8_t bits_are_set); /// \brief Count true values in a bitmap -static inline int64_t ArrowBitCountSet( - const uint8_t* bits, int64_t i_from, int64_t i_to); +static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to); /// \brief Initialize an ArrowBitmap /// @@ -1399,15 +1342,14 @@ static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap); /// /// Transfers the underlying buffer data and lifecycle management to another /// address and resets the bitmap. -static inline void ArrowBitmapMove( - struct ArrowBitmap* src, struct ArrowBitmap* dst); +static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst); /// \brief Ensure a bitmap builder has at least a given additional capacity /// /// Ensures that the buffer has space to append at least /// additional_size_bits, overallocating when required. -static inline ArrowErrorCode ArrowBitmapReserve( - struct ArrowBitmap* bitmap, int64_t additional_size_bits); +static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, + int64_t additional_size_bits); /// \brief Grow or shrink a bitmap to a given capacity /// @@ -1415,34 +1357,33 @@ static inline ArrowErrorCode ArrowBitmapReserve( /// if shrink_to_fit is non-zero. Calling ArrowBitmapResize() does not /// adjust the buffer's size member except when shrinking new_capacity_bits /// to a value less than the current number of bits in the bitmap. -static inline ArrowErrorCode ArrowBitmapResize( - struct ArrowBitmap* bitmap, int64_t new_capacity_bits, char shrink_to_fit); +static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, + int64_t new_capacity_bits, + char shrink_to_fit); -/// \brief Reserve space for and append zero or more of the same boolean value -/// to a bitmap -static inline ArrowErrorCode ArrowBitmapAppend( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); +/// \brief Reserve space for and append zero or more of the same boolean value to a bitmap +static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length); /// \brief Append zero or more of the same boolean value to a bitmap -static inline void ArrowBitmapAppendUnsafe( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length); +static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length); /// \brief Append boolean values encoded as int8_t to a bitmap /// /// The values must all be 0 or 1. -static inline void ArrowBitmapAppendInt8Unsafe( - struct ArrowBitmap* bitmap, const int8_t* values, int64_t n_values); +static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + const int8_t* values, int64_t n_values); /// \brief Append boolean values encoded as int32_t to a bitmap /// /// The values must all be 0 or 1. -static inline void ArrowBitmapAppendInt32Unsafe( - struct ArrowBitmap* bitmap, const int32_t* values, int64_t n_values); +static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, + const int32_t* values, int64_t n_values); /// \brief Reset a bitmap builder /// -/// Releases any memory held by buffer, empties the cache, and resets the size -/// to zero +/// Releases any memory held by buffer, empties the cache, and resets the size to zero static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); /// @} @@ -1461,17 +1402,16 @@ static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap); /// Initializes the fields and release callback of array. Caller /// is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromType( - struct ArrowArray* array, enum ArrowType storage_type); +ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray* array, + enum ArrowType storage_type); /// \brief Initialize the contents of an ArrowArray from an ArrowSchema /// /// Caller is responsible for calling the array->release callback if /// NANOARROW_OK is returned. -ArrowErrorCode ArrowArrayInitFromSchema( - struct ArrowArray* array, - struct ArrowSchema* schema, - struct ArrowError* error); +ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array, + struct ArrowSchema* schema, + struct ArrowError* error); /// \brief Allocate the array->children array /// @@ -1479,8 +1419,7 @@ ArrowErrorCode ArrowArrayInitFromSchema( /// whose members are marked as released and may be subsequently initialized /// with ArrowArrayInitFromType() or moved from an existing ArrowArray. /// schema must have been allocated using ArrowArrayInitFromType(). -ArrowErrorCode ArrowArrayAllocateChildren( - struct ArrowArray* array, int64_t n_children); +ArrowErrorCode ArrowArrayAllocateChildren(struct ArrowArray* array, int64_t n_children); /// \brief Allocate the array->dictionary member /// @@ -1493,33 +1432,30 @@ ArrowErrorCode ArrowArrayAllocateDictionary(struct ArrowArray* array); /// \brief Set the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -void ArrowArraySetValidityBitmap( - struct ArrowArray* array, struct ArrowBitmap* bitmap); +void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap* bitmap); /// \brief Set a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArraySetBuffer( - struct ArrowArray* array, int64_t i, struct ArrowBuffer* buffer); +ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i, + struct ArrowBuffer* buffer); /// \brief Get the validity bitmap of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -static inline struct ArrowBitmap* ArrowArrayValidityBitmap( - struct ArrowArray* array); +static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array); /// \brief Get a buffer of an ArrowArray /// /// array must have been allocated using ArrowArrayInitFromType() -static inline struct ArrowBuffer* ArrowArrayBuffer( - struct ArrowArray* array, int64_t i); +static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i); /// \brief Start element-wise appending to an ArrowArray /// /// Initializes any values needed to use ArrowArrayAppend*() functions. -/// All element-wise appenders append by value and return EINVAL if the exact -/// value cannot be represented by the underlying storage type. array must have -/// been allocated using ArrowArrayInitFromType() +/// All element-wise appenders append by value and return EINVAL if the exact value +/// cannot be represented by the underlying storage type. +/// array must have been allocated using ArrowArrayInitFromType() static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); /// \brief Reserve space for future appends @@ -1528,32 +1464,29 @@ static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array); /// child array sizes for non-fixed-size arrays), recursively reserve space for /// additional elements. This is useful for reducing the number of reallocations /// that occur using the item-wise appenders. -ArrowErrorCode ArrowArrayReserve( - struct ArrowArray* array, int64_t additional_size_elements); +ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array, + int64_t additional_size_elements); /// \brief Append a null value to an array -static inline ArrowErrorCode ArrowArrayAppendNull( - struct ArrowArray* array, int64_t n); +static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n); /// \brief Append an empty, non-null value to an array -static inline ArrowErrorCode ArrowArrayAppendEmpty( - struct ArrowArray* array, int64_t n); +static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n); /// \brief Append a signed integer value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range). -static inline ArrowErrorCode ArrowArrayAppendInt( - struct ArrowArray* array, int64_t value); +static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, int64_t value); /// \brief Append an unsigned integer value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range). -static inline ArrowErrorCode ArrowArrayAppendUInt( - struct ArrowArray* array, uint64_t value); +static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, + uint64_t value); /// \brief Append a double value to an array /// @@ -1561,8 +1494,8 @@ static inline ArrowErrorCode ArrowArrayAppendUInt( /// the underlying storage type or EINVAL otherwise (e.g., value /// is outside the valid array range or there is an attempt to append /// a non-integer to an array with an integer storage type). -static inline ArrowErrorCode ArrowArrayAppendDouble( - struct ArrowArray* array, double value); +static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, + double value); /// \brief Append a string of bytes to an array /// @@ -1571,47 +1504,46 @@ static inline ArrowErrorCode ArrowArrayAppendDouble( /// the underlying array is not a binary, string, large binary, large string, /// or fixed-size binary array, or value is the wrong size for a fixed-size /// binary array). -static inline ArrowErrorCode ArrowArrayAppendBytes( - struct ArrowArray* array, struct ArrowBufferView value); +static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, + struct ArrowBufferView value); /// \brief Append a string value to an array /// /// Returns NANOARROW_OK if value can be exactly represented by /// the underlying storage type or EINVAL otherwise (e.g., /// the underlying array is not a string or large string array). -static inline ArrowErrorCode ArrowArrayAppendString( - struct ArrowArray* array, struct ArrowStringView value); +static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, + struct ArrowStringView value); /// \brief Append a decimal value to an array /// /// Returns NANOARROW_OK if array is a decimal array with the appropriate /// bitwidth or EINVAL otherwise. -static inline ArrowErrorCode ArrowArrayAppendDecimal( - struct ArrowArray* array, struct ArrowDecimal* value); +static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, + struct ArrowDecimal* value); /// \brief Finish a nested array element /// /// Appends a non-null element to the array based on the first child's current /// length. Returns NANOARROW_OK if the item was successfully added or EINVAL -/// if the underlying storage type is not a struct, list, large list, or -/// fixed-size list, or if there was an attempt to add a struct or fixed-size -/// list element where the length of the child array(s) did not match the -/// expected length. +/// if the underlying storage type is not a struct, list, large list, or fixed-size +/// list, or if there was an attempt to add a struct or fixed-size list element where the +/// length of the child array(s) did not match the expected length. static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array); /// \brief Finish a union array element /// -/// Appends an element to the union type ids buffer and increments -/// array->length. For sparse unions, up to one element is added to non type-id -/// children. Returns EINVAL if the underlying storage type is not a union, if -/// type_id is not valid, or if child sizes after appending are inconsistent. -static inline ArrowErrorCode ArrowArrayFinishUnionElement( - struct ArrowArray* array, int8_t type_id); +/// Appends an element to the union type ids buffer and increments array->length. +/// For sparse unions, up to one element is added to non type-id children. Returns +/// EINVAL if the underlying storage type is not a union, if type_id is not valid, +/// or if child sizes after appending are inconsistent. +static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, + int8_t type_id); /// \brief Shrink buffer capacity to the size required /// -/// Also applies shrinking to any child arrays. array must have been allocated -/// using ArrowArrayInitFromType +/// Also applies shrinking to any child arrays. array must have been allocated using +/// ArrowArrayInitFromType static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); /// \brief Finish building an ArrowArray @@ -1620,20 +1552,19 @@ static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array); /// into array->buffers and checks the actual size of the buffers /// against the expected size based on the final length. /// array must have been allocated using ArrowArrayInitFromType() -ArrowErrorCode ArrowArrayFinishBuildingDefault( - struct ArrowArray* array, struct ArrowError* error); +ArrowErrorCode ArrowArrayFinishBuildingDefault(struct ArrowArray* array, + struct ArrowError* error); /// \brief Finish building an ArrowArray with explicit validation /// -/// Finish building with an explicit validation level. This could perform less -/// validation (i.e. NANOARROW_VALIDATION_LEVEL_NONE or -/// NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU buffer data access is not -/// possible or more validation (i.e., NANOARROW_VALIDATION_LEVEL_FULL) if -/// buffer content was obtained from an untrusted or corruptable source. -ArrowErrorCode ArrowArrayFinishBuilding( - struct ArrowArray* array, - enum ArrowValidationLevel validation_level, - struct ArrowError* error); +/// Finish building with an explicit validation level. This could perform less validation +/// (i.e. NANOARROW_VALIDATION_LEVEL_NONE or NANOARROW_VALIDATION_LEVEL_MINIMAL) if CPU +/// buffer data access is not possible or more validation (i.e., +/// NANOARROW_VALIDATION_LEVEL_FULL) if buffer content was obtained from an untrusted or +/// corruptable source. +ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array, + enum ArrowValidationLevel validation_level, + struct ArrowError* error); /// @} @@ -1644,81 +1575,76 @@ ArrowErrorCode ArrowArrayFinishBuilding( /// @{ /// \brief Initialize the contents of an ArrowArrayView -void ArrowArrayViewInitFromType( - struct ArrowArrayView* array_view, enum ArrowType storage_type); +void ArrowArrayViewInitFromType(struct ArrowArrayView* array_view, + enum ArrowType storage_type); /// \brief Move an ArrowArrayView /// /// Transfers the ArrowArrayView data and lifecycle management to another /// address and resets the contents of src. -static inline void ArrowArrayViewMove( - struct ArrowArrayView* src, struct ArrowArrayView* dst); +static inline void ArrowArrayViewMove(struct ArrowArrayView* src, + struct ArrowArrayView* dst); /// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema -ArrowErrorCode ArrowArrayViewInitFromSchema( - struct ArrowArrayView* array_view, - struct ArrowSchema* schema, - struct ArrowError* error); +ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view, + struct ArrowSchema* schema, + struct ArrowError* error); /// \brief Allocate the schema_view->children array /// /// Includes the memory for each child struct ArrowArrayView -ArrowErrorCode ArrowArrayViewAllocateChildren( - struct ArrowArrayView* array_view, int64_t n_children); +ArrowErrorCode ArrowArrayViewAllocateChildren(struct ArrowArrayView* array_view, + int64_t n_children); /// \brief Set data-independent buffer sizes from length void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t length); /// \brief Set buffer sizes and data pointers from an ArrowArray -ArrowErrorCode ArrowArrayViewSetArray( - struct ArrowArrayView* array_view, - struct ArrowArray* array, - struct ArrowError* error); +ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view, + struct ArrowArray* array, struct ArrowError* error); -/// \brief Performs extra checks on the array that was set via -/// ArrowArrayViewSetArray() -ArrowErrorCode ArrowArrayViewValidateFull( - struct ArrowArrayView* array_view, struct ArrowError* error); +/// \brief Performs extra checks on the array that was set via ArrowArrayViewSetArray() +ArrowErrorCode ArrowArrayViewValidateFull(struct ArrowArrayView* array_view, + struct ArrowError* error); /// \brief Reset the contents of an ArrowArrayView and frees resources void ArrowArrayViewReset(struct ArrowArrayView* array_view); /// \brief Check for a null element in an ArrowArrayView -static inline int8_t ArrowArrayViewIsNull( - struct ArrowArrayView* array_view, int64_t i); +static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i); /// \brief Get the type id of a union array element -static inline int8_t ArrowArrayViewUnionTypeId( - struct ArrowArrayView* array_view, int64_t i); +static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view, + int64_t i); /// \brief Get the child index of a union array element -static inline int8_t ArrowArrayViewUnionChildIndex( - struct ArrowArrayView* array_view, int64_t i); +static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view, + int64_t i); /// \brief Get the index to use into the relevant union child array -static inline int64_t ArrowArrayViewUnionChildOffset( - struct ArrowArrayView* array_view, int64_t i); +static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view, + int64_t i); /// \brief Get an element in an ArrowArrayView as an integer /// -/// This function does not check for null values, that values are actually -/// integers, or that values are within a valid range for an int64. -static inline int64_t ArrowArrayViewGetIntUnsafe( - struct ArrowArrayView* array_view, int64_t i); +/// This function does not check for null values, that values are actually integers, or +/// that values are within a valid range for an int64. +static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view, + int64_t i); /// \brief Get an element in an ArrowArrayView as an unsigned integer /// -/// This function does not check for null values, that values are actually -/// integers, or that values are within a valid range for a uint64. -static inline uint64_t ArrowArrayViewGetUIntUnsafe( - struct ArrowArrayView* array_view, int64_t i); +/// This function does not check for null values, that values are actually integers, or +/// that values are within a valid range for a uint64. +static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view, + int64_t i); /// \brief Get an element in an ArrowArrayView as a double /// /// This function does not check for null values, or /// that values are within a valid range for a double. -static inline double ArrowArrayViewGetDoubleUnsafe( - struct ArrowArrayView* array_view, int64_t i); +static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view, + int64_t i); /// \brief Get an element in an ArrowArrayView as an ArrowStringView /// @@ -1737,8 +1663,8 @@ static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( /// This function does not check for null values. The out parameter must /// be initialized with ArrowDecimalInit() with the proper parameters for this /// type before calling this for the first time. -static inline void ArrowArrayViewGetDecimalUnsafe( - struct ArrowArrayView* array_view, int64_t i, struct ArrowDecimal* out); +static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view, + int64_t i, struct ArrowDecimal* out); /// @} @@ -1756,10 +1682,8 @@ static inline void ArrowArrayViewGetDecimalUnsafe( /// This function moves the ownership of schema to the array_stream. If /// this function returns NANOARROW_OK, the caller is responsible for /// releasing the ArrowArrayStream. -ArrowErrorCode ArrowBasicArrayStreamInit( - struct ArrowArrayStream* array_stream, - struct ArrowSchema* schema, - int64_t n_arrays); +ArrowErrorCode ArrowBasicArrayStreamInit(struct ArrowArrayStream* array_stream, + struct ArrowSchema* schema, int64_t n_arrays); /// \brief Set the ith ArrowArray in this ArrowArrayStream. /// @@ -1768,21 +1692,23 @@ ArrowErrorCode ArrowBasicArrayStreamInit( /// be greater than zero and less than the value of n_arrays passed in /// ArrowBasicArrayStreamInit(). Callers are not required to fill all /// n_arrays members (i.e., n_arrays is a maximum bound). -void ArrowBasicArrayStreamSetArray( - struct ArrowArrayStream* array_stream, int64_t i, struct ArrowArray* array); +void ArrowBasicArrayStreamSetArray(struct ArrowArrayStream* array_stream, int64_t i, + struct ArrowArray* array); /// \brief Validate the contents of this ArrowArrayStream /// /// array_stream must have been initialized with ArrowBasicArrayStreamInit(). -/// This function uses ArrowArrayStreamInitFromSchema() and -/// ArrowArrayStreamSetArray() to validate the contents of the arrays. -ArrowErrorCode ArrowBasicArrayStreamValidate( - struct ArrowArrayStream* array_stream, struct ArrowError* error); +/// This function uses ArrowArrayStreamInitFromSchema() and ArrowArrayStreamSetArray() +/// to validate the contents of the arrays. +ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream* array_stream, + struct ArrowError* error); /// @} // Inline function definitions + + #ifdef __cplusplus } #endif @@ -1812,493 +1738,469 @@ ArrowErrorCode ArrowBasicArrayStreamValidate( #include #include + + #ifdef __cplusplus extern "C" { #endif -static inline int64_t _ArrowGrowByFactor( - int64_t current_capacity, int64_t new_capacity) { - int64_t doubled_capacity = current_capacity * 2; - if (doubled_capacity > new_capacity) { - return doubled_capacity; - } else { - return new_capacity; - } +static inline int64_t _ArrowGrowByFactor(int64_t current_capacity, int64_t new_capacity) { + int64_t doubled_capacity = current_capacity * 2; + if (doubled_capacity > new_capacity) { + return doubled_capacity; + } else { + return new_capacity; + } } static inline void ArrowBufferInit(struct ArrowBuffer* buffer) { - buffer->data = NULL; - buffer->size_bytes = 0; - buffer->capacity_bytes = 0; - buffer->allocator = ArrowBufferAllocatorDefault(); + buffer->data = NULL; + buffer->size_bytes = 0; + buffer->capacity_bytes = 0; + buffer->allocator = ArrowBufferAllocatorDefault(); } static inline ArrowErrorCode ArrowBufferSetAllocator( struct ArrowBuffer* buffer, struct ArrowBufferAllocator allocator) { - if (buffer->data == NULL) { - buffer->allocator = allocator; - return NANOARROW_OK; - } else { - return EINVAL; - } + if (buffer->data == NULL) { + buffer->allocator = allocator; + return NANOARROW_OK; + } else { + return EINVAL; + } } static inline void ArrowBufferReset(struct ArrowBuffer* buffer) { - if (buffer->data != NULL) { - buffer->allocator.free( - &buffer->allocator, (uint8_t*)buffer->data, buffer->capacity_bytes); - buffer->data = NULL; - } + if (buffer->data != NULL) { + buffer->allocator.free(&buffer->allocator, (uint8_t*)buffer->data, + buffer->capacity_bytes); + buffer->data = NULL; + } - buffer->capacity_bytes = 0; - buffer->size_bytes = 0; + buffer->capacity_bytes = 0; + buffer->size_bytes = 0; } -static inline void ArrowBufferMove( - struct ArrowBuffer* src, struct ArrowBuffer* dst) { - memcpy(dst, src, sizeof(struct ArrowBuffer)); - src->data = NULL; - ArrowBufferReset(src); +static inline void ArrowBufferMove(struct ArrowBuffer* src, struct ArrowBuffer* dst) { + memcpy(dst, src, sizeof(struct ArrowBuffer)); + src->data = NULL; + ArrowBufferReset(src); } -static inline ArrowErrorCode ArrowBufferResize( - struct ArrowBuffer* buffer, - int64_t new_capacity_bytes, - char shrink_to_fit) { - if (new_capacity_bytes < 0) { - return EINVAL; - } - - if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) { - buffer->data = buffer->allocator.reallocate( - &buffer->allocator, - buffer->data, - buffer->capacity_bytes, - new_capacity_bytes); - if (buffer->data == NULL && new_capacity_bytes > 0) { - buffer->capacity_bytes = 0; - buffer->size_bytes = 0; - return ENOMEM; - } +static inline ArrowErrorCode ArrowBufferResize(struct ArrowBuffer* buffer, + int64_t new_capacity_bytes, + char shrink_to_fit) { + if (new_capacity_bytes < 0) { + return EINVAL; + } - buffer->capacity_bytes = new_capacity_bytes; + if (new_capacity_bytes > buffer->capacity_bytes || shrink_to_fit) { + buffer->data = buffer->allocator.reallocate( + &buffer->allocator, buffer->data, buffer->capacity_bytes, new_capacity_bytes); + if (buffer->data == NULL && new_capacity_bytes > 0) { + buffer->capacity_bytes = 0; + buffer->size_bytes = 0; + return ENOMEM; } - // Ensures that when shrinking that size <= capacity - if (new_capacity_bytes < buffer->size_bytes) { - buffer->size_bytes = new_capacity_bytes; - } + buffer->capacity_bytes = new_capacity_bytes; + } - return NANOARROW_OK; + // Ensures that when shrinking that size <= capacity + if (new_capacity_bytes < buffer->size_bytes) { + buffer->size_bytes = new_capacity_bytes; + } + + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowBufferReserve( - struct ArrowBuffer* buffer, int64_t additional_size_bytes) { - int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes; - if (min_capacity_bytes <= buffer->capacity_bytes) { - return NANOARROW_OK; - } +static inline ArrowErrorCode ArrowBufferReserve(struct ArrowBuffer* buffer, + int64_t additional_size_bytes) { + int64_t min_capacity_bytes = buffer->size_bytes + additional_size_bytes; + if (min_capacity_bytes <= buffer->capacity_bytes) { + return NANOARROW_OK; + } - return ArrowBufferResize( - buffer, - _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), - 0); + return ArrowBufferResize( + buffer, _ArrowGrowByFactor(buffer->capacity_bytes, min_capacity_bytes), 0); } -static inline void ArrowBufferAppendUnsafe( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes) { - if (size_bytes > 0) { - memcpy(buffer->data + buffer->size_bytes, data, size_bytes); - buffer->size_bytes += size_bytes; - } +static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const void* data, + int64_t size_bytes) { + if (size_bytes > 0) { + memcpy(buffer->data + buffer->size_bytes, data, size_bytes); + buffer->size_bytes += size_bytes; + } } -static inline ArrowErrorCode ArrowBufferAppend( - struct ArrowBuffer* buffer, const void* data, int64_t size_bytes) { - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); +static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer, + const void* data, int64_t size_bytes) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); - ArrowBufferAppendUnsafe(buffer, data, size_bytes); - return NANOARROW_OK; + ArrowBufferAppendUnsafe(buffer, data, size_bytes); + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowBufferAppendInt8( - struct ArrowBuffer* buffer, int8_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int8_t)); +static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer, + int8_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int8_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt8( - struct ArrowBuffer* buffer, uint8_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint8_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt8(struct ArrowBuffer* buffer, + uint8_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint8_t)); } -static inline ArrowErrorCode ArrowBufferAppendInt16( - struct ArrowBuffer* buffer, int16_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int16_t)); +static inline ArrowErrorCode ArrowBufferAppendInt16(struct ArrowBuffer* buffer, + int16_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int16_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt16( - struct ArrowBuffer* buffer, uint16_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint16_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt16(struct ArrowBuffer* buffer, + uint16_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint16_t)); } -static inline ArrowErrorCode ArrowBufferAppendInt32( - struct ArrowBuffer* buffer, int32_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int32_t)); +static inline ArrowErrorCode ArrowBufferAppendInt32(struct ArrowBuffer* buffer, + int32_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int32_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt32( - struct ArrowBuffer* buffer, uint32_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint32_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt32(struct ArrowBuffer* buffer, + uint32_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint32_t)); } -static inline ArrowErrorCode ArrowBufferAppendInt64( - struct ArrowBuffer* buffer, int64_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(int64_t)); +static inline ArrowErrorCode ArrowBufferAppendInt64(struct ArrowBuffer* buffer, + int64_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(int64_t)); } -static inline ArrowErrorCode ArrowBufferAppendUInt64( - struct ArrowBuffer* buffer, uint64_t value) { - return ArrowBufferAppend(buffer, &value, sizeof(uint64_t)); +static inline ArrowErrorCode ArrowBufferAppendUInt64(struct ArrowBuffer* buffer, + uint64_t value) { + return ArrowBufferAppend(buffer, &value, sizeof(uint64_t)); } -static inline ArrowErrorCode ArrowBufferAppendDouble( - struct ArrowBuffer* buffer, double value) { - return ArrowBufferAppend(buffer, &value, sizeof(double)); +static inline ArrowErrorCode ArrowBufferAppendDouble(struct ArrowBuffer* buffer, + double value) { + return ArrowBufferAppend(buffer, &value, sizeof(double)); } -static inline ArrowErrorCode ArrowBufferAppendFloat( - struct ArrowBuffer* buffer, float value) { - return ArrowBufferAppend(buffer, &value, sizeof(float)); +static inline ArrowErrorCode ArrowBufferAppendFloat(struct ArrowBuffer* buffer, + float value) { + return ArrowBufferAppend(buffer, &value, sizeof(float)); } -static inline ArrowErrorCode ArrowBufferAppendStringView( - struct ArrowBuffer* buffer, struct ArrowStringView value) { - return ArrowBufferAppend(buffer, value.data, value.size_bytes); +static inline ArrowErrorCode ArrowBufferAppendStringView(struct ArrowBuffer* buffer, + struct ArrowStringView value) { + return ArrowBufferAppend(buffer, value.data, value.size_bytes); } -static inline ArrowErrorCode ArrowBufferAppendBufferView( - struct ArrowBuffer* buffer, struct ArrowBufferView value) { - return ArrowBufferAppend(buffer, value.data.data, value.size_bytes); +static inline ArrowErrorCode ArrowBufferAppendBufferView(struct ArrowBuffer* buffer, + struct ArrowBufferView value) { + return ArrowBufferAppend(buffer, value.data.data, value.size_bytes); } -static inline ArrowErrorCode ArrowBufferAppendFill( - struct ArrowBuffer* buffer, uint8_t value, int64_t size_bytes) { - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); +static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer, + uint8_t value, int64_t size_bytes) { + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes)); - memset(buffer->data + buffer->size_bytes, value, size_bytes); - buffer->size_bytes += size_bytes; - return NANOARROW_OK; + memset(buffer->data + buffer->size_bytes, value, size_bytes); + buffer->size_bytes += size_bytes; + return NANOARROW_OK; } static const uint8_t _ArrowkBitmask[] = {1, 2, 4, 8, 16, 32, 64, 128}; -static const uint8_t _ArrowkFlippedBitmask[] = { - 254, 253, 251, 247, 239, 223, 191, 127}; +static const uint8_t _ArrowkFlippedBitmask[] = {254, 253, 251, 247, 239, 223, 191, 127}; static const uint8_t _ArrowkPrecedingBitmask[] = {0, 1, 3, 7, 15, 31, 63, 127}; -static const uint8_t _ArrowkTrailingBitmask[] = { - 255, 254, 252, 248, 240, 224, 192, 128}; +static const uint8_t _ArrowkTrailingBitmask[] = {255, 254, 252, 248, 240, 224, 192, 128}; static const uint8_t _ArrowkBytePopcount[] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, - 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, - 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, - 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, + 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, + 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, + 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, + 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, + 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, + 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, + 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; static inline int64_t _ArrowRoundUpToMultipleOf8(int64_t value) { - return (value + 7) & ~((int64_t)7); + return (value + 7) & ~((int64_t)7); } static inline int64_t _ArrowRoundDownToMultipleOf8(int64_t value) { - return (value / 8) * 8; + return (value / 8) * 8; } static inline int64_t _ArrowBytesForBits(int64_t bits) { - return (bits >> 3) + ((bits & 7) != 0); + return (bits >> 3) + ((bits & 7) != 0); } static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { - *out = - (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | - values[4] << 4 | values[5] << 5 | values[6] << 6 | values[7] << 7); + *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 | + values[5] << 5 | values[6] << 6 | values[7] << 7); } static inline void _ArrowBitmapPackInt32(const int32_t* values, uint8_t* out) { - *out = - (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | - values[4] << 4 | values[5] << 5 | values[6] << 6 | values[7] << 7); + *out = (values[0] | values[1] << 1 | values[2] << 2 | values[3] << 3 | values[4] << 4 | + values[5] << 5 | values[6] << 6 | values[7] << 7); } static inline int8_t ArrowBitGet(const uint8_t* bits, int64_t i) { - return (bits[i >> 3] >> (i & 0x07)) & 1; + return (bits[i >> 3] >> (i & 0x07)) & 1; } static inline void ArrowBitSet(uint8_t* bits, int64_t i) { - bits[i / 8] |= _ArrowkBitmask[i % 8]; + bits[i / 8] |= _ArrowkBitmask[i % 8]; } static inline void ArrowBitClear(uint8_t* bits, int64_t i) { - bits[i / 8] &= _ArrowkFlippedBitmask[i % 8]; + bits[i / 8] &= _ArrowkFlippedBitmask[i % 8]; } static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set) { - bits[i / 8] ^= ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & - _ArrowkBitmask[i % 8]; -} - -static inline void ArrowBitsSetTo( - uint8_t* bits, int64_t start_offset, int64_t length, uint8_t bits_are_set) { - const int64_t i_begin = start_offset; - const int64_t i_end = start_offset + length; - const uint8_t fill_byte = (uint8_t)(-bits_are_set); - - const int64_t bytes_begin = i_begin / 8; - const int64_t bytes_end = i_end / 8 + 1; - - const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; - const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; - - if (bytes_end == bytes_begin + 1) { - // set bits within a single byte - const uint8_t - only_byte_mask = i_end % 8 == 0 ? - first_byte_mask : - (uint8_t)(first_byte_mask | last_byte_mask); - bits[bytes_begin] &= only_byte_mask; - bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); - return; - } + bits[i / 8] ^= + ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) & _ArrowkBitmask[i % 8]; +} - // set/clear trailing bits of first byte - bits[bytes_begin] &= first_byte_mask; - bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); +static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t length, + uint8_t bits_are_set) { + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; + const uint8_t fill_byte = (uint8_t)(-bits_are_set); - if (bytes_end - bytes_begin > 2) { - // set/clear whole bytes - memset( - bits + bytes_begin + 1, - fill_byte, - (size_t)(bytes_end - bytes_begin - 2)); - } + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_end = i_end / 8 + 1; - if (i_end % 8 == 0) { - return; - } + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; + + if (bytes_end == bytes_begin + 1) { + // set bits within a single byte + const uint8_t only_byte_mask = + i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask); + bits[bytes_begin] &= only_byte_mask; + bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); + return; + } - // set/clear leading bits of last byte - bits[bytes_end - 1] &= last_byte_mask; - bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask); + // set/clear trailing bits of first byte + bits[bytes_begin] &= first_byte_mask; + bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); + + if (bytes_end - bytes_begin > 2) { + // set/clear whole bytes + memset(bits + bytes_begin + 1, fill_byte, (size_t)(bytes_end - bytes_begin - 2)); + } + + if (i_end % 8 == 0) { + return; + } + + // set/clear leading bits of last byte + bits[bytes_end - 1] &= last_byte_mask; + bits[bytes_end - 1] |= (uint8_t)(fill_byte & ~last_byte_mask); } -static inline int64_t ArrowBitCountSet( - const uint8_t* bits, int64_t start_offset, int64_t length) { - if (length == 0) { - return 0; - } +static inline int64_t ArrowBitCountSet(const uint8_t* bits, int64_t start_offset, + int64_t length) { + if (length == 0) { + return 0; + } - const int64_t i_begin = start_offset; - const int64_t i_end = start_offset + length; + const int64_t i_begin = start_offset; + const int64_t i_end = start_offset + length; - const int64_t bytes_begin = i_begin / 8; - const int64_t bytes_end = i_end / 8 + 1; + const int64_t bytes_begin = i_begin / 8; + const int64_t bytes_end = i_end / 8 + 1; - if (bytes_end == bytes_begin + 1) { - // count bits within a single byte - const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8]; - const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8]; + if (bytes_end == bytes_begin + 1) { + // count bits within a single byte + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_end % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_begin % 8]; - const uint8_t - only_byte_mask = i_end % 8 == 0 ? - first_byte_mask : - (uint8_t)(first_byte_mask & last_byte_mask); + const uint8_t only_byte_mask = + i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask & last_byte_mask); - const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask; - return _ArrowkBytePopcount[byte_masked]; - } + const uint8_t byte_masked = bits[bytes_begin] & only_byte_mask; + return _ArrowkBytePopcount[byte_masked]; + } - const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; - const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; - int64_t count = 0; + const uint8_t first_byte_mask = _ArrowkPrecedingBitmask[i_begin % 8]; + const uint8_t last_byte_mask = _ArrowkTrailingBitmask[i_end % 8]; + int64_t count = 0; - // first byte - count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask]; + // first byte + count += _ArrowkBytePopcount[bits[bytes_begin] & ~first_byte_mask]; - // middle bytes - for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) { - count += _ArrowkBytePopcount[bits[i]]; - } + // middle bytes + for (int64_t i = bytes_begin + 1; i < (bytes_end - 1); i++) { + count += _ArrowkBytePopcount[bits[i]]; + } - // last byte - count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask]; + // last byte + count += _ArrowkBytePopcount[bits[bytes_end - 1] & ~last_byte_mask]; - return count; + return count; } static inline void ArrowBitmapInit(struct ArrowBitmap* bitmap) { - ArrowBufferInit(&bitmap->buffer); - bitmap->size_bits = 0; + ArrowBufferInit(&bitmap->buffer); + bitmap->size_bits = 0; } -static inline void ArrowBitmapMove( - struct ArrowBitmap* src, struct ArrowBitmap* dst) { - ArrowBufferMove(&src->buffer, &dst->buffer); - dst->size_bits = src->size_bits; - src->size_bits = 0; +static inline void ArrowBitmapMove(struct ArrowBitmap* src, struct ArrowBitmap* dst) { + ArrowBufferMove(&src->buffer, &dst->buffer); + dst->size_bits = src->size_bits; + src->size_bits = 0; } -static inline ArrowErrorCode ArrowBitmapReserve( - struct ArrowBitmap* bitmap, int64_t additional_size_bits) { - int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits; - if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) { - return NANOARROW_OK; - } +static inline ArrowErrorCode ArrowBitmapReserve(struct ArrowBitmap* bitmap, + int64_t additional_size_bits) { + int64_t min_capacity_bits = bitmap->size_bits + additional_size_bits; + if (min_capacity_bits <= (bitmap->buffer.capacity_bytes * 8)) { + return NANOARROW_OK; + } - NANOARROW_RETURN_NOT_OK(ArrowBufferReserve( - &bitmap->buffer, _ArrowBytesForBits(additional_size_bits))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferReserve(&bitmap->buffer, _ArrowBytesForBits(additional_size_bits))); - bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0; - return NANOARROW_OK; + bitmap->buffer.data[bitmap->buffer.capacity_bytes - 1] = 0; + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowBitmapResize( - struct ArrowBitmap* bitmap, int64_t new_capacity_bits, char shrink_to_fit) { - if (new_capacity_bits < 0) { - return EINVAL; - } +static inline ArrowErrorCode ArrowBitmapResize(struct ArrowBitmap* bitmap, + int64_t new_capacity_bits, + char shrink_to_fit) { + if (new_capacity_bits < 0) { + return EINVAL; + } - int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits); - NANOARROW_RETURN_NOT_OK( - ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit)); + int64_t new_capacity_bytes = _ArrowBytesForBits(new_capacity_bits); + NANOARROW_RETURN_NOT_OK( + ArrowBufferResize(&bitmap->buffer, new_capacity_bytes, shrink_to_fit)); - if (new_capacity_bits < bitmap->size_bits) { - bitmap->size_bits = new_capacity_bits; - } + if (new_capacity_bits < bitmap->size_bits) { + bitmap->size_bits = new_capacity_bits; + } + + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowBitmapAppend(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length)); + + ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length); + return NANOARROW_OK; +} - return NANOARROW_OK; +static inline void ArrowBitmapAppendUnsafe(struct ArrowBitmap* bitmap, + uint8_t bits_are_set, int64_t length) { + ArrowBitsSetTo(bitmap->buffer.data, bitmap->size_bits, length, bits_are_set); + bitmap->size_bits += length; + bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits); } -static inline ArrowErrorCode ArrowBitmapAppend( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) { - NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(bitmap, length)); - - ArrowBitmapAppendUnsafe(bitmap, bits_are_set, length); - return NANOARROW_OK; -} +static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + const int8_t* values, int64_t n_values) { + if (n_values == 0) { + return; + } -static inline void ArrowBitmapAppendUnsafe( - struct ArrowBitmap* bitmap, uint8_t bits_are_set, int64_t length) { - ArrowBitsSetTo( - bitmap->buffer.data, bitmap->size_bits, length, bits_are_set); - bitmap->size_bits += length; - bitmap->buffer.size_bytes = _ArrowBytesForBits(bitmap->size_bits); -} + const int8_t* values_cursor = values; + int64_t n_remaining = n_values; + int64_t out_i_cursor = bitmap->size_bits; + uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; -static inline void ArrowBitmapAppendInt8Unsafe( - struct ArrowBitmap* bitmap, const int8_t* values, int64_t n_values) { - if (n_values == 0) { - return; + // First byte + if ((out_i_cursor % 8) != 0) { + int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; + for (int i = 0; i < n_partial_bits; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); } - const int8_t* values_cursor = values; - int64_t n_remaining = n_values; - int64_t out_i_cursor = bitmap->size_bits; - uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; - - // First byte - if ((out_i_cursor % 8) != 0) { - int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - - out_i_cursor; - for (int i = 0; i < n_partial_bits; i++) { - ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); - } + out_cursor++; + values_cursor += n_partial_bits; + n_remaining -= n_partial_bits; + } - out_cursor++; - values_cursor += n_partial_bits; - n_remaining -= n_partial_bits; - } + // Middle bytes + int64_t n_full_bytes = n_remaining / 8; + for (int64_t i = 0; i < n_full_bytes; i++) { + _ArrowBitmapPackInt8(values_cursor, out_cursor); + values_cursor += 8; + out_cursor++; + } - // Middle bytes - int64_t n_full_bytes = n_remaining / 8; - for (int64_t i = 0; i < n_full_bytes; i++) { - _ArrowBitmapPackInt8(values_cursor, out_cursor); - values_cursor += 8; - out_cursor++; + // Last byte + out_i_cursor += n_full_bytes * 8; + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte + *out_cursor = 0x00; + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); } + out_cursor++; + } - // Last byte - out_i_cursor += n_full_bytes * 8; - n_remaining -= n_full_bytes * 8; - if (n_remaining > 0) { - // Zero out the last byte - *out_cursor = 0x00; - for (int i = 0; i < n_remaining; i++) { - ArrowBitSetTo( - bitmap->buffer.data, out_i_cursor++, values_cursor[i]); - } - out_cursor++; - } - - bitmap->size_bits += n_values; - bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; + bitmap->size_bits += n_values; + bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; } -static inline void ArrowBitmapAppendInt32Unsafe( - struct ArrowBitmap* bitmap, const int32_t* values, int64_t n_values) { - if (n_values == 0) { - return; - } +static inline void ArrowBitmapAppendInt32Unsafe(struct ArrowBitmap* bitmap, + const int32_t* values, int64_t n_values) { + if (n_values == 0) { + return; + } - const int32_t* values_cursor = values; - int64_t n_remaining = n_values; - int64_t out_i_cursor = bitmap->size_bits; - uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; - - // First byte - if ((out_i_cursor % 8) != 0) { - int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - - out_i_cursor; - for (int i = 0; i < n_partial_bits; i++) { - ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); - } + const int32_t* values_cursor = values; + int64_t n_remaining = n_values; + int64_t out_i_cursor = bitmap->size_bits; + uint8_t* out_cursor = bitmap->buffer.data + bitmap->size_bits / 8; - out_cursor++; - values_cursor += n_partial_bits; - n_remaining -= n_partial_bits; + // First byte + if ((out_i_cursor % 8) != 0) { + int64_t n_partial_bits = _ArrowRoundUpToMultipleOf8(out_i_cursor) - out_i_cursor; + for (int i = 0; i < n_partial_bits; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values[i]); } - // Middle bytes - int64_t n_full_bytes = n_remaining / 8; - for (int64_t i = 0; i < n_full_bytes; i++) { - _ArrowBitmapPackInt32(values_cursor, out_cursor); - values_cursor += 8; - out_cursor++; - } + out_cursor++; + values_cursor += n_partial_bits; + n_remaining -= n_partial_bits; + } + + // Middle bytes + int64_t n_full_bytes = n_remaining / 8; + for (int64_t i = 0; i < n_full_bytes; i++) { + _ArrowBitmapPackInt32(values_cursor, out_cursor); + values_cursor += 8; + out_cursor++; + } - // Last byte - out_i_cursor += n_full_bytes * 8; - n_remaining -= n_full_bytes * 8; - if (n_remaining > 0) { - // Zero out the last byte - *out_cursor = 0x00; - for (int i = 0; i < n_remaining; i++) { - ArrowBitSetTo( - bitmap->buffer.data, out_i_cursor++, values_cursor[i]); - } - out_cursor++; - } - - bitmap->size_bits += n_values; - bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; + // Last byte + out_i_cursor += n_full_bytes * 8; + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte + *out_cursor = 0x00; + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); + } + out_cursor++; + } + + bitmap->size_bits += n_values; + bitmap->buffer.size_bytes = out_cursor - bitmap->buffer.data; } static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) { - ArrowBufferReset(&bitmap->buffer); - bitmap->size_bits = 0; + ArrowBufferReset(&bitmap->buffer); + bitmap->size_bits = 0; } #ifdef __cplusplus @@ -2332,912 +2234,860 @@ static inline void ArrowBitmapReset(struct ArrowBitmap* bitmap) { #include #include + + + #ifdef __cplusplus extern "C" { #endif -static inline struct ArrowBitmap* ArrowArrayValidityBitmap( - struct ArrowArray* array) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - return &private_data->bitmap; -} - -static inline struct ArrowBuffer* ArrowArrayBuffer( - struct ArrowArray* array, int64_t i) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - switch (i) { - case 0: - return &private_data->bitmap.buffer; - default: - return private_data->buffers + i - 1; - } +static inline struct ArrowBitmap* ArrowArrayValidityBitmap(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + return &private_data->bitmap; +} + +static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array, int64_t i) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + switch (i) { + case 0: + return &private_data->bitmap.buffer; + default: + return private_data->buffers + i - 1; + } } // We don't currently support the case of unions where type_id != child_index; // however, these functions are used to keep track of where that assumption // is made. -static inline int8_t _ArrowArrayUnionChildIndex( - struct ArrowArray* array, int8_t type_id) { - return type_id; -} - -static inline int8_t _ArrowArrayUnionTypeId( - struct ArrowArray* array, int8_t child_index) { - return child_index; +static inline int8_t _ArrowArrayUnionChildIndex(struct ArrowArray* array, + int8_t type_id) { + return type_id; } -static inline int8_t _ArrowParseUnionTypeIds( - const char* type_ids, int8_t* out) { - if (*type_ids == '\0') { - return 0; - } - - int32_t i = 0; - long type_id; - char* end_ptr; - do { - type_id = strtol(type_ids, &end_ptr, 10); - if (end_ptr == type_ids || type_id < 0 || type_id > 127) { - return -1; - } - - if (out != NULL) { - out[i] = type_id; - } - - i++; - - type_ids = end_ptr; - if (*type_ids == '\0') { - return i; - } else if (*type_ids != ',') { - return -1; - } else { - type_ids++; - } - } while (1); - - return -1; +static inline int8_t _ArrowArrayUnionTypeId(struct ArrowArray* array, + int8_t child_index) { + return child_index; } -static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices( - const int8_t* type_ids, int64_t n_type_ids, int64_t n_children) { - if (n_type_ids != n_children) { - return 0; - } +static inline int8_t _ArrowParseUnionTypeIds(const char* type_ids, int8_t* out) { + if (*type_ids == '\0') { + return 0; + } - for (int8_t i = 0; i < n_type_ids; i++) { - if (type_ids[i] != i) { - return 0; - } + int32_t i = 0; + long type_id; + char* end_ptr; + do { + type_id = strtol(type_ids, &end_ptr, 10); + if (end_ptr == type_ids || type_id < 0 || type_id > 127) { + return -1; } - return 1; -} - -static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices( - const char* type_id_str, int64_t n_children) { - int8_t type_ids[128]; - int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); - return _ArrowParsedUnionTypeIdsWillEqualChildIndices( - type_ids, n_type_ids, n_children); -} - -static inline ArrowErrorCode ArrowArrayStartAppending( - struct ArrowArray* array) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_UNINITIALIZED: - return EINVAL; - case NANOARROW_TYPE_SPARSE_UNION: - case NANOARROW_TYPE_DENSE_UNION: - // Note that this value could be -1 if the type_ids string was - // invalid - if (private_data->union_type_id_is_child_index != 1) { - return EINVAL; - } else { - break; - } - default: - break; - } - if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) { - return EINVAL; + if (out != NULL) { + out[i] = type_id; } - // Initialize any data offset buffer with a single zero - for (int i = 0; i < 3; i++) { - if (private_data->layout.buffer_type[i] == - NANOARROW_BUFFER_TYPE_DATA_OFFSET && - private_data->layout.element_size_bits[i] == 64) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); - } else if ( - private_data->layout.buffer_type[i] == - NANOARROW_BUFFER_TYPE_DATA_OFFSET && - private_data->layout.element_size_bits[i] == 32) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); - } - } + i++; - // Start building any child arrays - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i])); + type_ids = end_ptr; + if (*type_ids == '\0') { + return i; + } else if (*type_ids != ',') { + return -1; + } else { + type_ids++; } + } while (1); - return NANOARROW_OK; + return -1; } -static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { - for (int64_t i = 0; i < 3; i++) { - struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); - NANOARROW_RETURN_NOT_OK( - ArrowBufferResize(buffer, buffer->size_bytes, 1)); - } +static inline int8_t _ArrowParsedUnionTypeIdsWillEqualChildIndices(const int8_t* type_ids, + int64_t n_type_ids, + int64_t n_children) { + if (n_type_ids != n_children) { + return 0; + } - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i])); + for (int8_t i = 0; i < n_type_ids; i++) { + if (type_ids[i] != i) { + return 0; } + } - return NANOARROW_OK; + return 1; } -static inline ArrowErrorCode _ArrowArrayAppendBits( - struct ArrowArray* array, int64_t buffer_i, uint8_t value, int64_t n) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i); - int64_t bytes_required = _ArrowRoundUpToMultipleOf8( - private_data->layout - .element_size_bits[buffer_i] * - (array->length + 1)) / - 8; - if (bytes_required > buffer->size_bytes) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill( - buffer, 0, bytes_required - buffer->size_bytes)); - } - - ArrowBitsSetTo(buffer->data, array->length, n, value); - return NANOARROW_OK; +static inline int8_t _ArrowUnionTypeIdsWillEqualChildIndices(const char* type_id_str, + int64_t n_children) { + int8_t type_ids[128]; + int8_t n_type_ids = _ArrowParseUnionTypeIds(type_id_str, type_ids); + return _ArrowParsedUnionTypeIdsWillEqualChildIndices(type_ids, n_type_ids, n_children); } -static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal( - struct ArrowArray* array, int64_t n, uint8_t is_valid) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - if (n == 0) { - return NANOARROW_OK; - } - - // Some type-specific handling - switch (private_data->storage_type) { - case NANOARROW_TYPE_NA: - // (An empty value for a null array *is* a null) - array->null_count += n; - array->length += n; - return NANOARROW_OK; - - case NANOARROW_TYPE_DENSE_UNION: { - // Add one null to the first child and append n references to that - // child - int8_t type_id = _ArrowArrayUnionTypeId(array, 0); - NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendEmptyInternal( - array->children[0], 1, is_valid)); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); - for (int64_t i = 0; i < n; i++) { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - ArrowArrayBuffer(array, 1), - (int32_t)array->children[0]->length - 1)); - } - // For the purposes of array->null_count, union elements are never - // considered "null" even if some children contain nulls. - array->length += n; - return NANOARROW_OK; - } - - case NANOARROW_TYPE_SPARSE_UNION: { - // Add n nulls to the first child and append n references to that - // child - int8_t type_id = _ArrowArrayUnionTypeId(array, 0); - NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendEmptyInternal( - array->children[0], n, is_valid)); - for (int64_t i = 1; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK( - ArrowArrayAppendEmpty(array->children[i], n)); - } - - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); - // For the purposes of array->null_count, union elements are never - // considered "null" even if some children contain nulls. - array->length += n; - return NANOARROW_OK; - } +static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray* array) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; - case NANOARROW_TYPE_FIXED_SIZE_LIST: - NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty( - array->children[0], - n * private_data->layout.child_size_elements)); - break; - case NANOARROW_TYPE_STRUCT: - for (int64_t i = 0; i < array->n_children; i++) { - NANOARROW_RETURN_NOT_OK( - ArrowArrayAppendEmpty(array->children[i], n)); - } - break; - - default: - break; - } - - // Append n is_valid bits to the validity bitmap. If we haven't allocated a - // bitmap yet and we need to append nulls, do it now. - if (!is_valid && private_data->bitmap.buffer.data == NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapReserve(&private_data->bitmap, array->length + n)); - ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length); - ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); - } else if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n)); - ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); - } - - // Add appropriate buffer fill - struct ArrowBuffer* buffer; - int64_t size_bytes; - - for (int i = 0; i < 3; i++) { - buffer = ArrowArrayBuffer(array, i); - size_bytes = private_data->layout.element_size_bits[i] / 8; - - switch (private_data->layout.buffer_type[i]) { - case NANOARROW_BUFFER_TYPE_NONE: - case NANOARROW_BUFFER_TYPE_VALIDITY: - continue; - case NANOARROW_BUFFER_TYPE_DATA_OFFSET: - // Append the current value at the end of the offset buffer for - // each element - NANOARROW_RETURN_NOT_OK( - ArrowBufferReserve(buffer, size_bytes * n)); - - for (int64_t j = 0; j < n; j++) { - ArrowBufferAppendUnsafe( - buffer, - buffer->data + size_bytes * (array->length + j), - size_bytes); - } - - // Skip the data buffer - i++; - continue; - case NANOARROW_BUFFER_TYPE_DATA: - // Zero out the next bit of memory - if (private_data->layout.element_size_bits[i] % 8 == 0) { - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFill(buffer, 0, size_bytes * n)); - } else { - NANOARROW_RETURN_NOT_OK( - _ArrowArrayAppendBits(array, i, 0, n)); - } - continue; - - case NANOARROW_BUFFER_TYPE_TYPE_ID: - case NANOARROW_BUFFER_TYPE_UNION_OFFSET: - // These cases return above - return EINVAL; - } - } - - array->length += n; - array->null_count += n * !is_valid; - return NANOARROW_OK; + switch (private_data->storage_type) { + case NANOARROW_TYPE_UNINITIALIZED: + return EINVAL; + case NANOARROW_TYPE_SPARSE_UNION: + case NANOARROW_TYPE_DENSE_UNION: + // Note that this value could be -1 if the type_ids string was invalid + if (private_data->union_type_id_is_child_index != 1) { + return EINVAL; + } else { + break; + } + default: + break; + } + if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) { + return EINVAL; + } + + // Initialize any data offset buffer with a single zero + for (int i = 0; i < 3; i++) { + if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && + private_data->layout.element_size_bits[i] == 64) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array, i), 0)); + } else if (private_data->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_DATA_OFFSET && + private_data->layout.element_size_bits[i] == 32) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array, i), 0)); + } + } + + // Start building any child arrays + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i])); + } + + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowArrayAppendNull( - struct ArrowArray* array, int64_t n) { - return _ArrowArrayAppendEmptyInternal(array, n, 0); -} - -static inline ArrowErrorCode ArrowArrayAppendEmpty( - struct ArrowArray* array, int64_t n) { - return _ArrowArrayAppendEmptyInternal(array, n, 1); -} - -static inline ArrowErrorCode ArrowArrayAppendInt( - struct ArrowArray* array, int64_t value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_INT64: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_buffer, &value, sizeof(int64_t))); - break; - case NANOARROW_TYPE_INT32: - _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt32(data_buffer, (int32_t)value)); - break; - case NANOARROW_TYPE_INT16: - _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt16(data_buffer, (int16_t)value)); - break; - case NANOARROW_TYPE_INT8: - _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt8(data_buffer, (int8_t)value)); - break; - case NANOARROW_TYPE_UINT64: - case NANOARROW_TYPE_UINT32: - case NANOARROW_TYPE_UINT16: - case NANOARROW_TYPE_UINT8: - _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); - return ArrowArrayAppendUInt(array, value); - case NANOARROW_TYPE_DOUBLE: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendDouble(data_buffer, value)); - break; - case NANOARROW_TYPE_FLOAT: - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value)); - break; - case NANOARROW_TYPE_BOOL: - NANOARROW_RETURN_NOT_OK( - _ArrowArrayAppendBits(array, 1, value != 0, 1)); - break; - default: - return EINVAL; - } - - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } +static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) { + for (int64_t i = 0; i < 3; i++) { + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i); + NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1)); + } + + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i])); + } + + return NANOARROW_OK; +} + +static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array, + int64_t buffer_i, uint8_t value, + int64_t n) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i); + int64_t bytes_required = + _ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] * + (array->length + 1)) / + 8; + if (bytes_required > buffer->size_bytes) { + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes)); + } - array->length++; - return NANOARROW_OK; + ArrowBitsSetTo(buffer->data, array->length, n, value); + return NANOARROW_OK; } -static inline ArrowErrorCode ArrowArrayAppendUInt( - struct ArrowArray* array, uint64_t value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_UINT64: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); - break; - case NANOARROW_TYPE_UINT32: - _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); - break; - case NANOARROW_TYPE_UINT16: - _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); - break; - case NANOARROW_TYPE_UINT8: - _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX); - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); - break; - case NANOARROW_TYPE_INT64: - case NANOARROW_TYPE_INT32: - case NANOARROW_TYPE_INT16: - case NANOARROW_TYPE_INT8: - _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); - return ArrowArrayAppendInt(array, value); - case NANOARROW_TYPE_DOUBLE: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendDouble(data_buffer, value)); - break; - case NANOARROW_TYPE_FLOAT: - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value)); - break; - case NANOARROW_TYPE_BOOL: - NANOARROW_RETURN_NOT_OK( - _ArrowArrayAppendBits(array, 1, value != 0, 1)); - break; - default: - return EINVAL; - } - - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } +static inline ArrowErrorCode _ArrowArrayAppendEmptyInternal(struct ArrowArray* array, + int64_t n, uint8_t is_valid) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; - array->length++; + if (n == 0) { return NANOARROW_OK; -} - -static inline ArrowErrorCode ArrowArrayAppendDouble( - struct ArrowArray* array, double value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_DOUBLE: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(data_buffer, &value, sizeof(double))); - break; - case NANOARROW_TYPE_FLOAT: - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendFloat(data_buffer, (float)value)); - break; - default: - return EINVAL; - } - - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } + } + + // Some type-specific handling + switch (private_data->storage_type) { + case NANOARROW_TYPE_NA: + // (An empty value for a null array *is* a null) + array->null_count += n; + array->length += n; + return NANOARROW_OK; + + case NANOARROW_TYPE_DENSE_UNION: { + // Add one null to the first child and append n references to that child + int8_t type_id = _ArrowArrayUnionTypeId(array, 0); + NANOARROW_RETURN_NOT_OK( + _ArrowArrayAppendEmptyInternal(array->children[0], 1, is_valid)); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); + for (int64_t i = 0; i < n; i++) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 1), (int32_t)array->children[0]->length - 1)); + } + // For the purposes of array->null_count, union elements are never considered "null" + // even if some children contain nulls. + array->length += n; + return NANOARROW_OK; + } + + case NANOARROW_TYPE_SPARSE_UNION: { + // Add n nulls to the first child and append n references to that child + int8_t type_id = _ArrowArrayUnionTypeId(array, 0); + NANOARROW_RETURN_NOT_OK( + _ArrowArrayAppendEmptyInternal(array->children[0], n, is_valid)); + for (int64_t i = 1; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); + } + + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendFill(ArrowArrayBuffer(array, 0), type_id, n)); + // For the purposes of array->null_count, union elements are never considered "null" + // even if some children contain nulls. + array->length += n; + return NANOARROW_OK; + } + + case NANOARROW_TYPE_FIXED_SIZE_LIST: + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty( + array->children[0], n * private_data->layout.child_size_elements)); + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array->n_children; i++) { + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], n)); + } + break; + + default: + break; + } + + // Append n is_valid bits to the validity bitmap. If we haven't allocated a bitmap yet + // and we need to append nulls, do it now. + if (!is_valid && private_data->bitmap.buffer.data == NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, array->length + n)); + ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length); + ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); + } else if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n)); + ArrowBitmapAppendUnsafe(&private_data->bitmap, is_valid, n); + } + + // Add appropriate buffer fill + struct ArrowBuffer* buffer; + int64_t size_bytes; + + for (int i = 0; i < 3; i++) { + buffer = ArrowArrayBuffer(array, i); + size_bytes = private_data->layout.element_size_bits[i] / 8; + + switch (private_data->layout.buffer_type[i]) { + case NANOARROW_BUFFER_TYPE_NONE: + case NANOARROW_BUFFER_TYPE_VALIDITY: + continue; + case NANOARROW_BUFFER_TYPE_DATA_OFFSET: + // Append the current value at the end of the offset buffer for each element + NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n)); + + for (int64_t j = 0; j < n; j++) { + ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes * (array->length + j), + size_bytes); + } - array->length++; - return NANOARROW_OK; -} + // Skip the data buffer + i++; + continue; + case NANOARROW_BUFFER_TYPE_DATA: + // Zero out the next bit of memory + if (private_data->layout.element_size_bits[i] % 8 == 0) { + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes * n)); + } else { + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n)); + } + continue; -static inline ArrowErrorCode ArrowArrayAppendBytes( - struct ArrowArray* array, struct ArrowBufferView value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1); - struct ArrowBuffer* data_buffer = ArrowArrayBuffer( - array, - 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY)); - int32_t offset; - int64_t large_offset; - int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_BINARY: - offset = ((int32_t*)offset_buffer->data)[array->length]; - if ((offset + value.size_bytes) > INT32_MAX) { - return EINVAL; - } - - offset += value.size_bytes; - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t))); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value.data.data, value.size_bytes)); - break; - - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - large_offset = ((int64_t*)offset_buffer->data)[array->length]; - large_offset += value.size_bytes; - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - offset_buffer, &large_offset, sizeof(int64_t))); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value.data.data, value.size_bytes)); - break; - - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - if (value.size_bytes != fixed_size_bytes) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value.data.data, value.size_bytes)); - break; - default: - return EINVAL; + case NANOARROW_BUFFER_TYPE_TYPE_ID: + case NANOARROW_BUFFER_TYPE_UNION_OFFSET: + // These cases return above + return EINVAL; } - - if (private_data->bitmap.buffer.data != NULL) { + } + + array->length += n; + array->null_count += n * !is_valid; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array, int64_t n) { + return _ArrowArrayAppendEmptyInternal(array, n, 0); +} + +static inline ArrowErrorCode ArrowArrayAppendEmpty(struct ArrowArray* array, int64_t n) { + return _ArrowArrayAppendEmptyInternal(array, n, 1); +} + +static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array, + int64_t value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_INT64: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(int64_t))); + break; + case NANOARROW_TYPE_INT32: + _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, (int32_t)value)); + break; + case NANOARROW_TYPE_INT16: + _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, (int16_t)value)); + break; + case NANOARROW_TYPE_INT8: + _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, (int8_t)value)); + break; + case NANOARROW_TYPE_UINT64: + case NANOARROW_TYPE_UINT32: + case NANOARROW_TYPE_UINT16: + case NANOARROW_TYPE_UINT8: + _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); + return ArrowArrayAppendUInt(array, value); + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value)); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value)); + break; + case NANOARROW_TYPE_BOOL: + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array, + uint64_t value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_UINT64: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(uint64_t))); + break; + case NANOARROW_TYPE_UINT32: + _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, (uint32_t)value)); + break; + case NANOARROW_TYPE_UINT16: + _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, (uint16_t)value)); + break; + case NANOARROW_TYPE_UINT8: + _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, (uint8_t)value)); + break; + case NANOARROW_TYPE_INT64: + case NANOARROW_TYPE_INT32: + case NANOARROW_TYPE_INT16: + case NANOARROW_TYPE_INT8: + _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX); + return ArrowArrayAppendInt(array, value); + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value)); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value)); + break; + case NANOARROW_TYPE_BOOL: + NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array, + double value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DOUBLE: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value, sizeof(double))); + break; + case NANOARROW_TYPE_FLOAT: + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, (float)value)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array, + struct ArrowBufferView value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1); + struct ArrowBuffer* data_buffer = ArrowArrayBuffer( + array, 1 + (private_data->storage_type != NANOARROW_TYPE_FIXED_SIZE_BINARY)); + int32_t offset; + int64_t large_offset; + int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + offset = ((int32_t*)offset_buffer->data)[array->length]; + if ((offset + value.size_bytes) > INT32_MAX) { + return EINVAL; + } + + offset += value.size_bytes; + NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + large_offset = ((int64_t*)offset_buffer->data)[array->length]; + large_offset += value.size_bytes; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t))); + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + if (value.size_bytes != fixed_size_bytes) { + return EINVAL; + } + + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes)); + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array, + struct ArrowStringView value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + struct ArrowBufferView buffer_view; + buffer_view.data.data = value.data; + buffer_view.size_bytes = value.size_bytes; + + switch (private_data->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_LARGE_STRING: + return ArrowArrayAppendBytes(array, buffer_view); + default: + return EINVAL; + } +} + +static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array, + struct ArrowDecimal* value) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DECIMAL128: + if (value->n_words != 2) { + return EINVAL; + } else { NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } - - array->length++; - return NANOARROW_OK; -} - -static inline ArrowErrorCode ArrowArrayAppendString( - struct ArrowArray* array, struct ArrowStringView value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - struct ArrowBufferView buffer_view; - buffer_view.data.data = value.data; - buffer_view.size_bytes = value.size_bytes; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_LARGE_STRING: - return ArrowArrayAppendBytes(array, buffer_view); - default: - return EINVAL; - } -} - -static inline ArrowErrorCode ArrowArrayAppendDecimal( - struct ArrowArray* array, struct ArrowDecimal* value) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1); - - switch (private_data->storage_type) { - case NANOARROW_TYPE_DECIMAL128: - if (value->n_words != 2) { - return EINVAL; - } else { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value->words, 2 * sizeof(uint64_t))); - break; - } - case NANOARROW_TYPE_DECIMAL256: - if (value->n_words != 4) { - return EINVAL; - } else { - NANOARROW_RETURN_NOT_OK(ArrowBufferAppend( - data_buffer, value->words, 4 * sizeof(uint64_t))); - break; - } - default: - return EINVAL; - } - - if (private_data->bitmap.buffer.data != NULL) { + ArrowBufferAppend(data_buffer, value->words, 2 * sizeof(uint64_t))); + break; + } + case NANOARROW_TYPE_DECIMAL256: + if (value->n_words != 4) { + return EINVAL; + } else { NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } + ArrowBufferAppend(data_buffer, value->words, 4 * sizeof(uint64_t))); + break; + } + default: + return EINVAL; + } - array->length++; - return NANOARROW_OK; + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; } static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; - - int64_t child_length; - - switch (private_data->storage_type) { - case NANOARROW_TYPE_LIST: - case NANOARROW_TYPE_MAP: - child_length = array->children[0]->length; - if (child_length > INT32_MAX) { - return EINVAL; - } - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - ArrowArrayBuffer(array, 1), (int32_t)child_length)); - break; - case NANOARROW_TYPE_LARGE_LIST: - child_length = array->children[0]->length; - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64( - ArrowArrayBuffer(array, 1), child_length)); - break; - case NANOARROW_TYPE_FIXED_SIZE_LIST: - child_length = array->children[0]->length; - if (child_length != ((array->length + 1) * - private_data->layout.child_size_elements)) { - return EINVAL; - } - break; - case NANOARROW_TYPE_STRUCT: - for (int64_t i = 0; i < array->n_children; i++) { - child_length = array->children[i]->length; - if (child_length != (array->length + 1)) { - return EINVAL; - } - } - break; - default: - return EINVAL; - } - - if (private_data->bitmap.buffer.data != NULL) { - NANOARROW_RETURN_NOT_OK( - ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); - } - - array->length++; - return NANOARROW_OK; -} + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; -static inline ArrowErrorCode ArrowArrayFinishUnionElement( - struct ArrowArray* array, int8_t type_id) { - struct ArrowArrayPrivateData* private_data = (struct ArrowArrayPrivateData*) - array->private_data; + int64_t child_length; - int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id); - if (child_index < 0 || child_index >= array->n_children) { + switch (private_data->storage_type) { + case NANOARROW_TYPE_LIST: + case NANOARROW_TYPE_MAP: + child_length = array->children[0]->length; + if (child_length > INT32_MAX) { return EINVAL; - } - - switch (private_data->storage_type) { - case NANOARROW_TYPE_DENSE_UNION: - // Apppend the target child length to the union offsets buffer - _NANOARROW_CHECK_RANGE( - array->children[child_index]->length, 0, INT32_MAX); - NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( - ArrowArrayBuffer(array, 1), - (int32_t)array->children[child_index]->length - 1)); - break; - case NANOARROW_TYPE_SPARSE_UNION: - // Append one empty to any non-target column that isn't already the - // right length or abort if appending a null will result in a column - // with invalid length - for (int64_t i = 0; i < array->n_children; i++) { - if (i == child_index || - array->children[i]->length == (array->length + 1)) { - continue; - } - - if (array->children[i]->length != array->length) { - return EINVAL; - } - - NANOARROW_RETURN_NOT_OK( - ArrowArrayAppendEmpty(array->children[i], 1)); - } - - break; - default: - return EINVAL; - } - - // Write to the type_ids buffer - NANOARROW_RETURN_NOT_OK( - ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id)); - array->length++; - return NANOARROW_OK; -} - -static inline void ArrowArrayViewMove( - struct ArrowArrayView* src, struct ArrowArrayView* dst) { - memcpy(dst, src, sizeof(struct ArrowArrayView)); - ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); -} - -static inline int8_t ArrowArrayViewIsNull( - struct ArrowArrayView* array_view, int64_t i) { - const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; - i += array_view->array->offset; - switch (array_view->storage_type) { - case NANOARROW_TYPE_NA: - return 0x01; - case NANOARROW_TYPE_DENSE_UNION: - case NANOARROW_TYPE_SPARSE_UNION: - // Unions are "never null" in Arrow land - return 0x00; - default: - return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i); - } -} - -static inline int8_t ArrowArrayViewUnionTypeId( - struct ArrowArrayView* array_view, int64_t i) { - switch (array_view->storage_type) { - case NANOARROW_TYPE_DENSE_UNION: - case NANOARROW_TYPE_SPARSE_UNION: - return array_view->buffer_views[0].data.as_int8[i]; - default: - return -1; - } -} - -static inline int8_t ArrowArrayViewUnionChildIndex( - struct ArrowArrayView* array_view, int64_t i) { - int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); - if (array_view->union_type_id_map == NULL) { - return type_id; - } else { - return array_view->union_type_id_map[type_id]; - } -} - -static inline int64_t ArrowArrayViewUnionChildOffset( - struct ArrowArrayView* array_view, int64_t i) { - switch (array_view->storage_type) { - case NANOARROW_TYPE_DENSE_UNION: - return array_view->buffer_views[1].data.as_int32[i]; - case NANOARROW_TYPE_SPARSE_UNION: - return i; - default: - return -1; - } -} - -static inline int64_t ArrowArrayViewGetIntUnsafe( - struct ArrowArrayView* array_view, int64_t i) { - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - i += array_view->array->offset; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INT64: - return data_view->data.as_int64[i]; - case NANOARROW_TYPE_UINT64: - return data_view->data.as_uint64[i]; - case NANOARROW_TYPE_INT32: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_UINT32: - return data_view->data.as_uint32[i]; - case NANOARROW_TYPE_INT16: - return data_view->data.as_int16[i]; - case NANOARROW_TYPE_UINT16: - return data_view->data.as_uint16[i]; - case NANOARROW_TYPE_INT8: - return data_view->data.as_int8[i]; - case NANOARROW_TYPE_UINT8: - return data_view->data.as_uint8[i]; - case NANOARROW_TYPE_DOUBLE: - return data_view->data.as_double[i]; - case NANOARROW_TYPE_FLOAT: - return data_view->data.as_float[i]; - case NANOARROW_TYPE_BOOL: - return ArrowBitGet(data_view->data.as_uint8, i); - default: - return INT64_MAX; - } -} + } + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), (int32_t)child_length)); + break; + case NANOARROW_TYPE_LARGE_LIST: + child_length = array->children[0]->length; + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length)); + break; + case NANOARROW_TYPE_FIXED_SIZE_LIST: + child_length = array->children[0]->length; + if (child_length != + ((array->length + 1) * private_data->layout.child_size_elements)) { + return EINVAL; + } + break; + case NANOARROW_TYPE_STRUCT: + for (int64_t i = 0; i < array->n_children; i++) { + child_length = array->children[i]->length; + if (child_length != (array->length + 1)) { + return EINVAL; + } + } + break; + default: + return EINVAL; + } + + if (private_data->bitmap.buffer.data != NULL) { + NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array), 1, 1)); + } + + array->length++; + return NANOARROW_OK; +} + +static inline ArrowErrorCode ArrowArrayFinishUnionElement(struct ArrowArray* array, + int8_t type_id) { + struct ArrowArrayPrivateData* private_data = + (struct ArrowArrayPrivateData*)array->private_data; + + int64_t child_index = _ArrowArrayUnionChildIndex(array, type_id); + if (child_index < 0 || child_index >= array->n_children) { + return EINVAL; + } + + switch (private_data->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + // Apppend the target child length to the union offsets buffer + _NANOARROW_CHECK_RANGE(array->children[child_index]->length, 0, INT32_MAX); + NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32( + ArrowArrayBuffer(array, 1), (int32_t)array->children[child_index]->length - 1)); + break; + case NANOARROW_TYPE_SPARSE_UNION: + // Append one empty to any non-target column that isn't already the right length + // or abort if appending a null will result in a column with invalid length + for (int64_t i = 0; i < array->n_children; i++) { + if (i == child_index || array->children[i]->length == (array->length + 1)) { + continue; + } -static inline uint64_t ArrowArrayViewGetUIntUnsafe( - struct ArrowArrayView* array_view, int64_t i) { - i += array_view->array->offset; - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INT64: - return data_view->data.as_int64[i]; - case NANOARROW_TYPE_UINT64: - return data_view->data.as_uint64[i]; - case NANOARROW_TYPE_INT32: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_UINT32: - return data_view->data.as_uint32[i]; - case NANOARROW_TYPE_INT16: - return data_view->data.as_int16[i]; - case NANOARROW_TYPE_UINT16: - return data_view->data.as_uint16[i]; - case NANOARROW_TYPE_INT8: - return data_view->data.as_int8[i]; - case NANOARROW_TYPE_UINT8: - return data_view->data.as_uint8[i]; - case NANOARROW_TYPE_DOUBLE: - return data_view->data.as_double[i]; - case NANOARROW_TYPE_FLOAT: - return data_view->data.as_float[i]; - case NANOARROW_TYPE_BOOL: - return ArrowBitGet(data_view->data.as_uint8, i); - default: - return UINT64_MAX; - } -} + if (array->children[i]->length != array->length) { + return EINVAL; + } -static inline double ArrowArrayViewGetDoubleUnsafe( - struct ArrowArrayView* array_view, int64_t i) { - i += array_view->array->offset; - struct ArrowBufferView* data_view = &array_view->buffer_views[1]; - switch (array_view->storage_type) { - case NANOARROW_TYPE_INT64: - return data_view->data.as_int64[i]; - case NANOARROW_TYPE_UINT64: - return data_view->data.as_uint64[i]; - case NANOARROW_TYPE_INT32: - return data_view->data.as_int32[i]; - case NANOARROW_TYPE_UINT32: - return data_view->data.as_uint32[i]; - case NANOARROW_TYPE_INT16: - return data_view->data.as_int16[i]; - case NANOARROW_TYPE_UINT16: - return data_view->data.as_uint16[i]; - case NANOARROW_TYPE_INT8: - return data_view->data.as_int8[i]; - case NANOARROW_TYPE_UINT8: - return data_view->data.as_uint8[i]; - case NANOARROW_TYPE_DOUBLE: - return data_view->data.as_double[i]; - case NANOARROW_TYPE_FLOAT: - return data_view->data.as_float[i]; - case NANOARROW_TYPE_BOOL: - return ArrowBitGet(data_view->data.as_uint8, i); - default: - return DBL_MAX; - } + NANOARROW_RETURN_NOT_OK(ArrowArrayAppendEmpty(array->children[i], 1)); + } + + break; + default: + return EINVAL; + } + + // Write to the type_ids buffer + NANOARROW_RETURN_NOT_OK( + ArrowBufferAppendInt8(ArrowArrayBuffer(array, 0), (int8_t)type_id)); + array->length++; + return NANOARROW_OK; +} + +static inline void ArrowArrayViewMove(struct ArrowArrayView* src, + struct ArrowArrayView* dst) { + memcpy(dst, src, sizeof(struct ArrowArrayView)); + ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED); +} + +static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view, int64_t i) { + const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8; + i += array_view->array->offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_NA: + return 0x01; + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + // Unions are "never null" in Arrow land + return 0x00; + default: + return validity_buffer != NULL && !ArrowBitGet(validity_buffer, i); + } +} + +static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView* array_view, + int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + case NANOARROW_TYPE_SPARSE_UNION: + return array_view->buffer_views[0].data.as_int8[i]; + default: + return -1; + } +} + +static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView* array_view, + int64_t i) { + int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i); + if (array_view->union_type_id_map == NULL) { + return type_id; + } else { + return array_view->union_type_id_map[type_id]; + } +} + +static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView* array_view, + int64_t i) { + switch (array_view->storage_type) { + case NANOARROW_TYPE_DENSE_UNION: + return array_view->buffer_views[1].data.as_int32[i]; + case NANOARROW_TYPE_SPARSE_UNION: + return i; + default: + return -1; + } +} + +static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView* array_view, + int64_t i) { + struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + i += array_view->array->offset; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return data_view->data.as_float[i]; + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return INT64_MAX; + } +} + +static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_view, + int64_t i) { + i += array_view->array->offset; + struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return data_view->data.as_float[i]; + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return UINT64_MAX; + } +} + +static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView* array_view, + int64_t i) { + i += array_view->array->offset; + struct ArrowBufferView* data_view = &array_view->buffer_views[1]; + switch (array_view->storage_type) { + case NANOARROW_TYPE_INT64: + return data_view->data.as_int64[i]; + case NANOARROW_TYPE_UINT64: + return data_view->data.as_uint64[i]; + case NANOARROW_TYPE_INT32: + return data_view->data.as_int32[i]; + case NANOARROW_TYPE_UINT32: + return data_view->data.as_uint32[i]; + case NANOARROW_TYPE_INT16: + return data_view->data.as_int16[i]; + case NANOARROW_TYPE_UINT16: + return data_view->data.as_uint16[i]; + case NANOARROW_TYPE_INT8: + return data_view->data.as_int8[i]; + case NANOARROW_TYPE_UINT8: + return data_view->data.as_uint8[i]; + case NANOARROW_TYPE_DOUBLE: + return data_view->data.as_double[i]; + case NANOARROW_TYPE_FLOAT: + return data_view->data.as_float[i]; + case NANOARROW_TYPE_BOOL: + return ArrowBitGet(data_view->data.as_uint8, i); + default: + return DBL_MAX; + } } static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe( struct ArrowArrayView* array_view, int64_t i) { - i += array_view->array->offset; - struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; - const char* data_view = array_view->buffer_views[2].data.as_char; - - struct ArrowStringView view; - switch (array_view->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_BINARY: - view.data = data_view + offsets_view->data.as_int32[i]; - view.size_bytes = offsets_view->data.as_int32[i + 1] - - offsets_view->data.as_int32[i]; - break; - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - view.data = data_view + offsets_view->data.as_int64[i]; - view.size_bytes = offsets_view->data.as_int64[i + 1] - - offsets_view->data.as_int64[i]; - break; - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - view.size_bytes = array_view->layout.element_size_bits[1] / 8; - view.data = array_view->buffer_views[1].data.as_char + - (i * view.size_bytes); - break; - default: - view.data = NULL; - view.size_bytes = 0; - break; - } - - return view; + i += array_view->array->offset; + struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const char* data_view = array_view->buffer_views[2].data.as_char; + + struct ArrowStringView view; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + view.data = data_view + offsets_view->data.as_int32[i]; + view.size_bytes = + offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + break; + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + view.data = data_view + offsets_view->data.as_int64[i]; + view.size_bytes = + offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; + break; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + view.size_bytes = array_view->layout.element_size_bits[1] / 8; + view.data = array_view->buffer_views[1].data.as_char + (i * view.size_bytes); + break; + default: + view.data = NULL; + view.size_bytes = 0; + break; + } + + return view; } static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe( struct ArrowArrayView* array_view, int64_t i) { - i += array_view->array->offset; - struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; - const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; - - struct ArrowBufferView view; - switch (array_view->storage_type) { - case NANOARROW_TYPE_STRING: - case NANOARROW_TYPE_BINARY: - view.size_bytes = offsets_view->data.as_int32[i + 1] - - offsets_view->data.as_int32[i]; - view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; - break; - case NANOARROW_TYPE_LARGE_STRING: - case NANOARROW_TYPE_LARGE_BINARY: - view.size_bytes = offsets_view->data.as_int64[i + 1] - - offsets_view->data.as_int64[i]; - view.data.as_uint8 = data_view + offsets_view->data.as_int64[i]; - break; - case NANOARROW_TYPE_FIXED_SIZE_BINARY: - view.size_bytes = array_view->layout.element_size_bits[1] / 8; - view.data.as_uint8 = array_view->buffer_views[1].data.as_uint8 + - (i * view.size_bytes); - break; - default: - view.data.data = NULL; - view.size_bytes = 0; - break; - } - - return view; -} - -static inline void ArrowArrayViewGetDecimalUnsafe( - struct ArrowArrayView* array_view, int64_t i, struct ArrowDecimal* out) { - i += array_view->array->offset; - const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; - switch (array_view->storage_type) { - case NANOARROW_TYPE_DECIMAL128: - ArrowDecimalSetBytes(out, data_view + (i * 16)); - break; - case NANOARROW_TYPE_DECIMAL256: - ArrowDecimalSetBytes(out, data_view + (i * 32)); - break; - default: - memset(out->words, 0, sizeof(out->words)); - break; - } + i += array_view->array->offset; + struct ArrowBufferView* offsets_view = &array_view->buffer_views[1]; + const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8; + + struct ArrowBufferView view; + switch (array_view->storage_type) { + case NANOARROW_TYPE_STRING: + case NANOARROW_TYPE_BINARY: + view.size_bytes = + offsets_view->data.as_int32[i + 1] - offsets_view->data.as_int32[i]; + view.data.as_uint8 = data_view + offsets_view->data.as_int32[i]; + break; + case NANOARROW_TYPE_LARGE_STRING: + case NANOARROW_TYPE_LARGE_BINARY: + view.size_bytes = + offsets_view->data.as_int64[i + 1] - offsets_view->data.as_int64[i]; + view.data.as_uint8 = data_view + offsets_view->data.as_int64[i]; + break; + case NANOARROW_TYPE_FIXED_SIZE_BINARY: + view.size_bytes = array_view->layout.element_size_bits[1] / 8; + view.data.as_uint8 = + array_view->buffer_views[1].data.as_uint8 + (i * view.size_bytes); + break; + default: + view.data.data = NULL; + view.size_bytes = 0; + break; + } + + return view; +} + +static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView* array_view, + int64_t i, struct ArrowDecimal* out) { + i += array_view->array->offset; + const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8; + switch (array_view->storage_type) { + case NANOARROW_TYPE_DECIMAL128: + ArrowDecimalSetBytes(out, data_view + (i * 16)); + break; + case NANOARROW_TYPE_DECIMAL256: + ArrowDecimalSetBytes(out, data_view + (i * 32)); + break; + default: + memset(out->words, 0, sizeof(out->words)); + break; + } } #ifdef __cplusplus diff --git a/apis/r/src/rinterface.cpp b/apis/r/src/rinterface.cpp index 3064bc055e..4a312bb4c3 100644 --- a/apis/r/src/rinterface.cpp +++ b/apis/r/src/rinterface.cpp @@ -1,6 +1,6 @@ -#include // for R interface to C++ -#include // for C interface to Arrow -#include // for fromInteger64 +#include // for R interface to C++ +#include // for C interface to Arrow +#include // for fromInteger64 // we currently get deprecation warnings by default which are noisy #ifndef TILEDB_NO_API_DEPRECATION_WARNINGS @@ -14,8 +14,8 @@ #include #endif -#include "rutilities.h" // local declarations -#include "xptr-utils.h" // xptr taggging utilities +#include "rutilities.h" // local declarations +#include "xptr-utils.h" // xptr taggging utilities // (Adapted) helper functions from nanoarrow // @@ -23,46 +23,41 @@ // non-null, non-released pointer when garbage collected. We use a tagged XPtr, // but do not set an XPtr finalizer Rcpp::XPtr schema_owning_xptr(void) { - struct ArrowSchema* schema = (struct ArrowSchema*)ArrowMalloc( - sizeof(struct ArrowSchema)); - if (schema == NULL) - Rcpp::stop("Failed to allocate ArrowSchema"); - schema->release = NULL; - Rcpp::XPtr schema_xptr = make_xptr(schema, false); - return schema_xptr; + struct ArrowSchema* schema = (struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema)); + if (schema == NULL) Rcpp::stop("Failed to allocate ArrowSchema"); + schema->release = NULL; + Rcpp::XPtr schema_xptr = make_xptr(schema, false); + return schema_xptr; } // Create an external pointer with the proper class and that will release any // non-null, non-released pointer when garbage collected. We use a tagged XPtr, // but do not set an XPtr finalizer Rcpp::XPtr array_owning_xptr(void) { - struct ArrowArray* array = (struct ArrowArray*)ArrowMalloc( - sizeof(struct ArrowArray)); - if (array == NULL) - Rcpp::stop("Failed to allocate ArrowArray"); - array->release = NULL; - Rcpp::XPtr array_xptr = make_xptr(array, false); - return array_xptr; + struct ArrowArray* array = (struct ArrowArray*)ArrowMalloc(sizeof(struct ArrowArray)); + if (array == NULL) Rcpp::stop("Failed to allocate ArrowArray"); + array->release = NULL; + Rcpp::XPtr array_xptr = make_xptr(array, false); + return array_xptr; } namespace tdbs = tiledbsoma; -Rcpp::XPtr schema_setup_struct( - Rcpp::XPtr schxp, int64_t n_children); -Rcpp::XPtr array_setup_struct( - Rcpp::XPtr arrxp, int64_t n_children); +Rcpp::XPtr schema_setup_struct(Rcpp::XPtr schxp, int64_t n_children); +Rcpp::XPtr array_setup_struct(Rcpp::XPtr arrxp, int64_t n_children); + //' @noRd // [[Rcpp::export(soma_array_reader_impl)]] -Rcpp::List soma_array_reader( - const std::string& uri, - Rcpp::Nullable colnames = R_NilValue, - Rcpp::Nullable> qc = R_NilValue, - Rcpp::Nullable dim_points = R_NilValue, - Rcpp::Nullable dim_ranges = R_NilValue, - std::string batch_size = "auto", - std::string result_order = "auto", - const std::string& loglevel = "auto", - Rcpp::Nullable config = R_NilValue) { +Rcpp::List soma_array_reader(const std::string& uri, + Rcpp::Nullable colnames = R_NilValue, + Rcpp::Nullable> qc = R_NilValue, + Rcpp::Nullable dim_points = R_NilValue, + Rcpp::Nullable dim_ranges = R_NilValue, + std::string batch_size = "auto", + std::string result_order = "auto", + const std::string& loglevel = "auto", + Rcpp::Nullable config = R_NilValue) { + if (loglevel != "auto") { spdl::set_level(loglevel); tdbs::LOG_SET_LEVEL(loglevel); @@ -70,44 +65,36 @@ Rcpp::List soma_array_reader( spdl::info("[soma_array_reader] Reading from {}", uri); - std::map platform_config = config_vector_to_map( - config); + std::map platform_config = config_vector_to_map(config); // to create a Context object: // std::make_shared(Config(platform_config)), std::vector column_names = {}; - if (!colnames.isNull()) { // If we have column names, select them + if (!colnames.isNull()) { // If we have column names, select them column_names = Rcpp::as>(colnames); - spdl::debug( - "[soma_array_reader] Selecting {} columns", column_names.size()); + spdl::debug("[soma_array_reader] Selecting {} columns", column_names.size()); } auto tdb_result_order = get_tdb_result_order(result_order); // Read selected columns from the uri (return is unique_ptr) - auto sr = tdbs::SOMAArray::open( - OpenMode::read, - uri, - "unnamed", // name parameter could be added - platform_config, - column_names, - batch_size, - tdb_result_order); - - std::unordered_map> - name2dim; + auto sr = tdbs::SOMAArray::open(OpenMode::read, + uri, + "unnamed", // name parameter could be added + platform_config, + column_names, + batch_size, + tdb_result_order); + + std::unordered_map> name2dim; std::shared_ptr schema = sr->tiledb_schema(); tiledb::Domain domain = schema->domain(); std::vector dims = domain.dimensions(); - for (auto& dim : dims) { - spdl::info( - "[soma_array_reader] Dimension {} type {} domain {} extent {}", - dim.name(), - tiledb::impl::to_str(dim.type()), - dim.domain_to_str(), - dim.tile_extent_to_str()); - name2dim.emplace(std::make_pair( - dim.name(), std::make_shared(dim))); + for (auto& dim: dims) { + spdl::info("[soma_array_reader] Dimension {} type {} domain {} extent {}", + dim.name(), tiledb::impl::to_str(dim.type()), + dim.domain_to_str(), dim.tile_extent_to_str()); + name2dim.emplace(std::make_pair(dim.name(), std::make_shared(dim))); } // If we have a query condition, apply it @@ -118,9 +105,8 @@ Rcpp::List soma_array_reader( } // If we have dimension points, apply them - // The interface is named list, where each (named) list elements is one - // (named) dimesion The List element is a simple vector of points and each - // point is applied to the named dimension + // The interface is named list, where each (named) list elements is one (named) dimesion + // The List element is a simple vector of points and each point is applied to the named dimension if (!dim_points.isNull()) { Rcpp::List lst(dim_points); apply_dim_points(sr.get(), name2dim, lst); @@ -135,17 +121,12 @@ Rcpp::List soma_array_reader( // Getting next batch: std::optional> auto sr_data = sr->read_next(); if (!sr->results_complete()) { - Rcpp::stop( - "Read of '%s' is incomplete.\nConsider increasing the memory " - "allocation via the configuration\noption " - "'soma.init_buffer_bytes', " - "or using iterated partial reads.", - uri); + Rcpp::stop("Read of '%s' is incomplete.\nConsider increasing the memory " + "allocation via the configuration\noption 'soma.init_buffer_bytes', " + "or using iterated partial reads.", uri); } - spdl::info( - "[soma_array_reader] Read complete with {} rows and {} cols", - sr_data->get()->num_rows(), - sr_data->get()->names().size()); + spdl::info("[soma_array_reader] Read complete with {} rows and {} cols", + sr_data->get()->num_rows(), sr_data->get()->names().size()); const std::vector names = sr_data->get()->names(); auto ncol = names.size(); @@ -155,9 +136,8 @@ Rcpp::List soma_array_reader( arrayxp = array_setup_struct(arrayxp, ncol); arrayxp->length = 0; - for (size_t i = 0; i < ncol; i++) { - // this allocates, and properly wraps as external pointers controlling - // lifetime + for (size_t i=0; i chldschemaxp = schema_owning_xptr(); Rcpp::XPtr chldarrayxp = array_owning_xptr(); @@ -169,13 +149,11 @@ Rcpp::List soma_array_reader( // this is pair of array and schema pointer auto pp = tdbs::ArrowAdapter::to_arrow(buf); - memcpy((void*)chldschemaxp, pp.second.get(), sizeof(ArrowSchema)); - memcpy((void*)chldarrayxp, pp.first.get(), sizeof(ArrowArray)); + memcpy((void*) chldschemaxp, pp.second.get(), sizeof(ArrowSchema)); + memcpy((void*) chldarrayxp, pp.first.get(), sizeof(ArrowArray)); - spdl::info( - "[soma_array_reader] Incoming name {} length {}", - std::string(pp.second->name), - pp.first->length); + spdl::info("[soma_array_reader] Incoming name {} length {}", + std::string(pp.second->name), pp.first->length); schemaxp->children[i] = chldschemaxp; arrayxp->children[i] = chldarrayxp; @@ -190,24 +168,22 @@ Rcpp::List soma_array_reader( // } if (pp.first->length > arrayxp->length) { - spdl::debug( - "[soma_array_reader] Setting array length to {}", - pp.first->length); + spdl::debug("[soma_array_reader] Setting array length to {}", pp.first->length); arrayxp->length = pp.first->length; } } - Rcpp::List as = Rcpp::List::create( - Rcpp::Named("array_data") = arrayxp, Rcpp::Named("schema") = schemaxp); + Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = arrayxp, + Rcpp::Named("schema") = schemaxp); return as; } //' Set the logging level for the R package and underlying C++ library //' -//' @param level A character value with logging level understood by -//\sQuote{spdlog} ' such as \dQuote{trace}, \dQuote{debug}, \dQuote{info}, or -//\dQuote{warn}. ' @return Nothing is returned as the function is invoked for -//the side-effect. ' @export +//' @param level A character value with logging level understood by \sQuote{spdlog} +//' such as \dQuote{trace}, \dQuote{debug}, \dQuote{info}, or \dQuote{warn}. +//' @return Nothing is returned as the function is invoked for the side-effect. +//' @export // [[Rcpp::export]] void set_log_level(const std::string& level) { spdl::set_level(level); @@ -216,13 +192,14 @@ void set_log_level(const std::string& level) { //' @noRd // [[Rcpp::export]] -Rcpp::CharacterVector get_column_types( - const std::string& uri, const std::vector& colnames) { +Rcpp::CharacterVector get_column_types(const std::string& uri, + const std::vector& colnames) { + auto sr = tdbs::SOMAArray::open(OpenMode::read, uri); auto sr_data = sr->read_next(); size_t n = colnames.size(); Rcpp::CharacterVector vs(n); - for (size_t i = 0; i < n; i++) { + for (size_t i=0; iget()->at(colnames[i])->type(); vs[i] = std::string(tiledb::impl::to_str(datatype)); } @@ -231,36 +208,28 @@ Rcpp::CharacterVector get_column_types( } // [[Rcpp::export]] -double nnz( - const std::string& uri, - Rcpp::Nullable config = R_NilValue) { - auto sr = tdbs::SOMAArray::open( - OpenMode::read, uri, "unnamed", config_vector_to_map(config)); +double nnz(const std::string& uri, Rcpp::Nullable config = R_NilValue) { + auto sr = tdbs::SOMAArray::open(OpenMode::read, uri, "unnamed", config_vector_to_map(config)); return static_cast(sr->nnz()); } //' @noRd // [[Rcpp::export]] bool check_arrow_schema_tag(Rcpp::XPtr xp) { - check_xptr_tag(xp); // throws if mismatched - return true; + check_xptr_tag(xp); // throws if mismatched + return true; } //' @noRd // [[Rcpp::export]] bool check_arrow_array_tag(Rcpp::XPtr xp) { - check_xptr_tag(xp); // throws if mismatched - return true; + check_xptr_tag(xp); // throws if mismatched + return true; } // [[Rcpp::export]] -Rcpp::NumericVector shape( - const std::string& uri, - Rcpp::Nullable config = R_NilValue) { - auto sr = tdbs::SOMAArray::open( - OpenMode::read, - uri, - "unnamed", - config_vector_to_map(Rcpp::wrap(config))); +Rcpp::NumericVector shape(const std::string& uri, + Rcpp::Nullable config = R_NilValue) { + auto sr = tdbs::SOMAArray::open(OpenMode::read, uri, "unnamed", config_vector_to_map(Rcpp::wrap(config))); return Rcpp::toInteger64(sr->shape()); } diff --git a/apis/r/src/riterator.cpp b/apis/r/src/riterator.cpp index 2665d01b31..1cd170aacc 100644 --- a/apis/r/src/riterator.cpp +++ b/apis/r/src/riterator.cpp @@ -3,8 +3,8 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -#include // for R interface to C++ -#include // for C interface to Arrow +#include // for R interface to C++ +#include // for C interface to Arrow #include #if TILEDB_VERSION_MAJOR == 2 && TILEDB_VERSION_MINOR >= 4 @@ -15,44 +15,43 @@ #define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 #include -#include "rutilities.h" // local declarations -#include "xptr-utils.h" // xptr taggging utilitie -Rcpp::XPtr schema_setup_struct( - Rcpp::XPtr schxp, int64_t n_children); -Rcpp::XPtr array_setup_struct( - Rcpp::XPtr arrxp, int64_t n_children); +#include "rutilities.h" // local declarations +#include "xptr-utils.h" // xptr taggging utilitie +Rcpp::XPtr schema_setup_struct(Rcpp::XPtr schxp, int64_t n_children); +Rcpp::XPtr array_setup_struct(Rcpp::XPtr arrxp, int64_t n_children); namespace tdbs = tiledbsoma; //' Iterator-Style Access to SOMA Array via SOMAArray //' -//' The `sr_*` functions provide low-level access to an instance of the -//SOMAArray ' class so that iterative access over parts of a (large) array is -//possible. ' \describe{ ' \item{\code{sr_setup}}{instantiates and by default -//also submits a query} ' \item{\code{sr_complete}}{checks if more data is -//available} ' \item{\code{sr_next}}{returns the next chunk} ' } +//' The `sr_*` functions provide low-level access to an instance of the SOMAArray +//' class so that iterative access over parts of a (large) array is possible. +//' \describe{ +//' \item{\code{sr_setup}}{instantiates and by default also submits a query} +//' \item{\code{sr_complete}}{checks if more data is available} +//' \item{\code{sr_next}}{returns the next chunk} +//' } //' //' @param uri Character value with URI path to a SOMA data set -//' @param config Named chracter vector with \sQuote{key} and \sQuote{value} -//pairs ' used as TileDB config parameters. ' @param colnames Optional vector of -//character value with the name of the columns to retrieve ' @param qc Optional -//external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} -//i.e. ' no query condition ' @param dim_points Optional named list with vector -//of data points to select on the given ' dimension(s). Each dimension can be -//one entry in the list. ' @param dim_ranges Optional named list with two-column -//matrix where each row select a range ' for the given dimension. Each dimension -//can be one entry in the list. ' @param batch_size Optional argument for size -//of data batches, defaults to \sQuote{auto} ' @param result_order Optional -//argument for query result order, defaults to \sQuote{auto} ' @param loglevel -//Character value with the desired logging level, defaults to \sQuote{auto} ' -//which lets prior setting prevail, any other value is set as new logging level. -//' @param timestamp_end Optional POSIXct (i.e. Datetime) type for end of -//interval for which ' data is considered. ' @param sr An external pointer to a -//TileDB SOMAArray object +//' @param config Named chracter vector with \sQuote{key} and \sQuote{value} pairs +//' used as TileDB config parameters. +//' @param colnames Optional vector of character value with the name of the columns to retrieve +//' @param qc Optional external Pointer object to TileDB Query Condition, defaults to \sQuote{NULL} i.e. +//' no query condition +//' @param dim_points Optional named list with vector of data points to select on the given +//' dimension(s). Each dimension can be one entry in the list. +//' @param dim_ranges Optional named list with two-column matrix where each row select a range +//' for the given dimension. Each dimension can be one entry in the list. +//' @param batch_size Optional argument for size of data batches, defaults to \sQuote{auto} +//' @param result_order Optional argument for query result order, defaults to \sQuote{auto} +//' @param loglevel Character value with the desired logging level, defaults to \sQuote{auto} +//' which lets prior setting prevail, any other value is set as new logging level. +//' @param timestamp_end Optional POSIXct (i.e. Datetime) type for end of interval for which +//' data is considered. +//' @param sr An external pointer to a TileDB SOMAArray object //' -//' @return \code{sr_setup} returns an external pointer to a SOMAArray. -//\code{sr_complete} ' returns a boolean, and \code{sr_next} returns an Arrow -//array helper object. +//' @return \code{sr_setup} returns an external pointer to a SOMAArray. \code{sr_complete} +//' returns a boolean, and \code{sr_next} returns an Arrow array helper object. //' //' @examples //' \dontrun{ @@ -69,18 +68,18 @@ namespace tdbs = tiledbsoma; //' } //' @noRd // [[Rcpp::export]] -Rcpp::List sr_setup( - const std::string& uri, - Rcpp::CharacterVector config, - Rcpp::Nullable colnames = R_NilValue, - Rcpp::Nullable> qc = R_NilValue, - Rcpp::Nullable dim_points = R_NilValue, - Rcpp::Nullable dim_ranges = R_NilValue, - std::string batch_size = "auto", - std::string result_order = "auto", - Rcpp::Nullable timestamp_end = R_NilValue, - const std::string& loglevel = "auto") { - // Rcpp::XPtr sr_setup(const std::string& uri, +Rcpp::List sr_setup(const std::string& uri, + Rcpp::CharacterVector config, + Rcpp::Nullable colnames = R_NilValue, + Rcpp::Nullable> qc = R_NilValue, + Rcpp::Nullable dim_points = R_NilValue, + Rcpp::Nullable dim_ranges = R_NilValue, + std::string batch_size = "auto", + std::string result_order = "auto", + Rcpp::Nullable timestamp_end = R_NilValue, + const std::string& loglevel = "auto") { + + //Rcpp::XPtr sr_setup(const std::string& uri, if (loglevel != "auto") { spdl::set_level(loglevel); @@ -92,12 +91,11 @@ Rcpp::List sr_setup( std::string_view name = "unnamed"; std::vector column_names = {}; - std::map platform_config = config_vector_to_map( - Rcpp::wrap(config)); + + std::map platform_config = config_vector_to_map(Rcpp::wrap(config)); tiledb::Config cfg(platform_config); spdl::debug("[sr_setup] creating ctx object with supplied config"); - std::shared_ptr ctxptr = std::make_shared( - cfg); + std::shared_ptr ctxptr = std::make_shared(cfg); ctx_wrap_t* ctxwrap_p = new ContextWrapper(ctxptr); Rcpp::XPtr ctx_wrap_xptr = make_xptr(ctxwrap_p); @@ -106,56 +104,40 @@ Rcpp::List sr_setup( column_names = Rcpp::as>(colnames); } - std::uint64_t ts_start = 0; // beginning of time aka the epoch (force - // double signature) - std::uint64_t - ts_end = std::numeric_limits::max(); // max if unset + std::uint64_t ts_start = 0; // beginning of time aka the epoch (force double signature) + std::uint64_t ts_end = std::numeric_limits::max(); // max if unset if (!timestamp_end.isNull()) { - ts_end = Rcpp::as(timestamp_end) - .getFractionalTimestamp() * - 1e3; // in msec + ts_end = Rcpp::as(timestamp_end).getFractionalTimestamp() * 1e3; // in msec spdl::info(tfm::format("[sr_setup] ts_end set to %ld", ts_end)); } auto tdb_result_order = get_tdb_result_order(result_order); - auto ptr = new tdbs::SOMAArray( - OpenMode::read, - uri, - name, - platform_config, - column_names, - batch_size, - tdb_result_order, - std::make_pair(ts_start, ts_end)); - - std::unordered_map> - name2dim; + auto ptr = new tdbs::SOMAArray(OpenMode::read, uri, name, platform_config, + column_names, batch_size, + tdb_result_order, std::make_pair(ts_start, ts_end)); + + std::unordered_map> name2dim; std::shared_ptr schema = ptr->tiledb_schema(); tiledb::Domain domain = schema->domain(); std::vector dims = domain.dimensions(); - for (auto& dim : dims) { - spdl::debug( - "[sr_setup] Dimension {} type {} domain {} extent {}", - dim.name(), - tiledb::impl::to_str(dim.type()), - dim.domain_to_str(), - dim.tile_extent_to_str()); - name2dim.emplace(std::make_pair( - dim.name(), std::make_shared(dim))); + for (auto& dim: dims) { + spdl::debug("[sr_setup] Dimension {} type {} domain {} extent {}", + dim.name(), tiledb::impl::to_str(dim.type()), + dim.domain_to_str(), dim.tile_extent_to_str()); + name2dim.emplace(std::make_pair(dim.name(), std::make_shared(dim))); } // If we have a query condition, apply it if (!qc.isNull()) { - spdl::debug("[sr_setup] Applying query condition"); + spdl::debug("[sr_setup] Applying query condition") ; Rcpp::XPtr qcxp(qc); ptr->set_condition(*qcxp); } // If we have dimension points, apply them - // The interface is named list, where each (named) list elements is one - // (named) dimesion The List element is a simple vector of points and each - // point is applied to the named dimension + // The interface is named list, where each (named) list elements is one (named) dimesion + // The List element is a simple vector of points and each point is applied to the named dimension if (!dim_points.isNull()) { Rcpp::List lst(dim_points); apply_dim_points(ptr, name2dim, lst); @@ -168,8 +150,8 @@ Rcpp::List sr_setup( } Rcpp::XPtr xptr = make_xptr(ptr); - return Rcpp::List::create( - Rcpp::Named("sr") = xptr, Rcpp::Named("ctx") = ctx_wrap_xptr); + return Rcpp::List::create(Rcpp::Named("sr") = xptr, + Rcpp::Named("ctx") = ctx_wrap_xptr); } // [[Rcpp::export]] @@ -177,13 +159,8 @@ bool sr_complete(Rcpp::XPtr sr) { check_xptr_tag(sr); bool complt = sr->is_complete(true); bool initial = sr->is_initial_read(); - bool res = complt && !initial; // completed transfer if query status - // complete and query ran once - spdl::debug( - "[sr_complete] Complete query test {} (compl {} initial {})", - res, - complt, - initial); + bool res = complt && !initial; // completed transfer if query status complete and query ran once + spdl::debug("[sr_complete] Complete query test {} (compl {} initial {})", res, complt, initial); return res; } @@ -193,75 +170,68 @@ Rcpp::List create_empty_arrow_table() { schemaxp = schema_setup_struct(schemaxp, 0); arrayxp = array_setup_struct(arrayxp, 0); arrayxp->length = 0; - Rcpp::List as = Rcpp::List::create( - Rcpp::Named("array_data") = arrayxp, Rcpp::Named("schema") = schemaxp); + Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = arrayxp, + Rcpp::Named("schema") = schemaxp); return as; } + // [[Rcpp::export]] Rcpp::List sr_next(Rcpp::XPtr sr) { - check_xptr_tag(sr); - - if (sr_complete(sr)) { - spdl::trace( - "[sr_next] complete {} num_cells {}", - sr->is_complete(true), - sr->total_num_cells()); - return create_empty_arrow_table(); - } - - if (!sr->is_initial_read() && sr->total_num_cells() == 0) { - spdl::trace( - "[sr_next] is_initial_read {} num_cells {}", - sr->is_initial_read(), - sr->total_num_cells()); - return create_empty_arrow_table(); - } - - auto sr_data = sr->read_next(); - spdl::debug( - "[sr_next] Read {} rows and {} cols", - sr_data->get()->num_rows(), - sr_data->get()->names().size()); - - const std::vector names = sr_data->get()->names(); - auto ncol = names.size(); - Rcpp::XPtr schemaxp = schema_owning_xptr(); - Rcpp::XPtr arrayxp = array_owning_xptr(); - schemaxp = schema_setup_struct(schemaxp, ncol); - arrayxp = array_setup_struct(arrayxp, ncol); - arrayxp->length = 0; - - for (size_t i = 0; i < ncol; i++) { - // this allocates, and properly wraps as external pointers controlling - // lifetime - Rcpp::XPtr chldschemaxp = schema_owning_xptr(); - Rcpp::XPtr chldarrayxp = array_owning_xptr(); - - spdl::trace("[sr_next] Accessing {} at {}", names[i], i); - - // now buf is a shared_ptr to ColumnBuffer - auto buf = sr_data->get()->at(names[i]); - - // this is pair of array and schema pointer - auto pp = tdbs::ArrowAdapter::to_arrow(buf); - - memcpy((void*)chldschemaxp, pp.second.get(), sizeof(ArrowSchema)); - memcpy((void*)chldarrayxp, pp.first.get(), sizeof(ArrowArray)); - - schemaxp->children[i] = chldschemaxp; - arrayxp->children[i] = chldarrayxp; - - if (pp.first->length > arrayxp->length) { - spdl::debug( - "[soma_array_reader] Setting array length to {}", - pp.first->length); - arrayxp->length = pp.first->length; - } - } - - spdl::debug("[sr_next] Exporting chunk with {} rows", arrayxp->length); - Rcpp::List as = Rcpp::List::create( - Rcpp::Named("array_data") = arrayxp, Rcpp::Named("schema") = schemaxp); - return as; + check_xptr_tag(sr); + + if (sr_complete(sr)) { + spdl::trace("[sr_next] complete {} num_cells {}", + sr->is_complete(true), sr->total_num_cells()); + return create_empty_arrow_table(); + } + + if (!sr->is_initial_read() && sr->total_num_cells() == 0) { + spdl::trace("[sr_next] is_initial_read {} num_cells {}", + sr->is_initial_read(), sr->total_num_cells()); + return create_empty_arrow_table(); + } + + auto sr_data = sr->read_next(); + spdl::debug("[sr_next] Read {} rows and {} cols", + sr_data->get()->num_rows(), sr_data->get()->names().size()); + + const std::vector names = sr_data->get()->names(); + auto ncol = names.size(); + Rcpp::XPtr schemaxp = schema_owning_xptr(); + Rcpp::XPtr arrayxp = array_owning_xptr(); + schemaxp = schema_setup_struct(schemaxp, ncol); + arrayxp = array_setup_struct(arrayxp, ncol); + arrayxp->length = 0; + + for (size_t i=0; i chldschemaxp = schema_owning_xptr(); + Rcpp::XPtr chldarrayxp = array_owning_xptr(); + + spdl::trace("[sr_next] Accessing {} at {}", names[i], i); + + // now buf is a shared_ptr to ColumnBuffer + auto buf = sr_data->get()->at(names[i]); + + // this is pair of array and schema pointer + auto pp = tdbs::ArrowAdapter::to_arrow(buf); + + memcpy((void*) chldschemaxp, pp.second.get(), sizeof(ArrowSchema)); + memcpy((void*) chldarrayxp, pp.first.get(), sizeof(ArrowArray)); + + schemaxp->children[i] = chldschemaxp; + arrayxp->children[i] = chldarrayxp; + + if (pp.first->length > arrayxp->length) { + spdl::debug("[soma_array_reader] Setting array length to {}", pp.first->length); + arrayxp->length = pp.first->length; + } + + } + + spdl::debug("[sr_next] Exporting chunk with {} rows", arrayxp->length); + Rcpp::List as = Rcpp::List::create(Rcpp::Named("array_data") = arrayxp, + Rcpp::Named("schema") = schemaxp); + return as; } diff --git a/apis/r/src/rutilities.cpp b/apis/r/src/rutilities.cpp index ce1b4d5cd6..2430dd8d81 100644 --- a/apis/r/src/rutilities.cpp +++ b/apis/r/src/rutilities.cpp @@ -4,26 +4,24 @@ #define TILEDB_NO_API_DEPRECATION_WARNINGS #endif -#include // for R interface to C++ -#include // for C interface to Arrow -#include // for fromInteger64 +#include // for R interface to C++ +#include // for C interface to Arrow +#include // for fromInteger64 // We get these via nanoarrow and must cannot include carrow.h again #define ARROW_SCHEMA_AND_ARRAY_DEFINED 1 #include -#include "rutilities.h" // local declarations -#include "xptr-utils.h" // xptr taggging utilitie +#include "rutilities.h" // local declarations +#include "xptr-utils.h" // xptr taggging utilitie namespace tdbs = tiledbsoma; -void apply_dim_points( - tdbs::SOMAArray* sr, - std::unordered_map>& - name2dim, - Rcpp::List lst) { +void apply_dim_points(tdbs::SOMAArray *sr, + std::unordered_map>& name2dim, + Rcpp::List lst) { std::vector colnames = lst.attr("names"); - for (auto& nm : colnames) { + for (auto& nm: colnames) { auto dm = name2dim[nm]; auto tp = dm->type(); bool suitable = false; @@ -31,238 +29,154 @@ void apply_dim_points( Rcpp::NumericVector payload = lst[nm]; std::vector iv = Rcpp::fromInteger64(payload, false); std::vector uv(iv.size()); - const std::pair pr = dm->domain(); - for (size_t i = 0; i < iv.size(); i++) { + const std::pair pr = dm->domain(); + for (size_t i=0; i(iv[i]); if (uv[i] >= pr.first && uv[i] <= pr.second) { - sr->set_dim_point( - nm, uv[i]); // bonked when use with vector - spdl::info( - "[apply_dim_points] Applying dim point {} on {}", - uv[i], - nm); + sr->set_dim_point(nm, uv[i]); // bonked when use with vector + spdl::info("[apply_dim_points] Applying dim point {} on {}", uv[i], nm); suitable = true; } } } else if (tp == TILEDB_INT64) { Rcpp::NumericVector payload = lst[nm]; std::vector iv = Rcpp::fromInteger64(payload, false); - const std::pair pr = dm->domain(); - for (size_t i = 0; i < iv.size(); i++) { + const std::pair pr = dm->domain(); + for (size_t i=0; i= pr.first && iv[i] <= pr.second) { sr->set_dim_point(nm, iv[i]); - spdl::info( - "[apply_dim_points] Applying dim point {} on {}", - iv[i], - nm); + spdl::info("[apply_dim_points] Applying dim point {} on {}", iv[i], nm); suitable = true; } } } else if (tp == TILEDB_FLOAT32) { Rcpp::NumericVector payload = lst[nm]; - const std::pair pr = dm->domain(); - for (R_xlen_t i = 0; i < payload.size(); i++) { + const std::pair pr = dm->domain(); + for (R_xlen_t i=0; i(payload[i]); if (v >= pr.first && v <= pr.second) { sr->set_dim_point(nm, v); - spdl::info( - "[apply_dim_points] Applying dim point {} on {}", - v, - nm); + spdl::info("[apply_dim_points] Applying dim point {} on {}", v, nm); suitable = true; } } } else if (tp == TILEDB_FLOAT64) { Rcpp::NumericVector payload = lst[nm]; - const std::pair pr = dm->domain(); - for (R_xlen_t i = 0; i < payload.size(); i++) { + const std::pair pr = dm->domain(); + for (R_xlen_t i=0; i= pr.first && payload[i] <= pr.second) { - sr->set_dim_point(nm, payload[i]); - spdl::info( - "[apply_dim_points] Applying dim point {} on {}", - payload[i], - nm); + sr->set_dim_point(nm,payload[i]); + spdl::info("[apply_dim_points] Applying dim point {} on {}", payload[i], nm); suitable = true; } } } else if (tp == TILEDB_INT32) { Rcpp::IntegerVector payload = lst[nm]; - const std::pair pr = dm->domain(); - for (R_xlen_t i = 0; i < payload.size(); i++) { + const std::pair pr = dm->domain(); + for (R_xlen_t i=0; i= pr.first && payload[i] <= pr.second) { - sr->set_dim_point(nm, payload[i]); - spdl::info( - "[apply_dim_points] Applying dim point {} on {}", - payload[i], - nm); + sr->set_dim_point(nm,payload[i]); + spdl::info("[apply_dim_points] Applying dim point {} on {}", payload[i], nm); suitable = true; } } } else { - Rcpp::stop( - "Currently unsupported type: ", tiledb::impl::to_str(tp)); + Rcpp::stop("Currently unsupported type: ", tiledb::impl::to_str(tp)); } if (!suitable) { - Rcpp::stop( - "Unsuitable dim points on dimension '%s' with domain %s", - nm, - dm->domain_to_str()); + Rcpp::stop("Unsuitable dim points on dimension '%s' with domain %s", nm, dm->domain_to_str()); } } } -void apply_dim_ranges( - tdbs::SOMAArray* sr, - std::unordered_map>& - name2dim, - Rcpp::List lst) { +void apply_dim_ranges(tdbs::SOMAArray* sr, + std::unordered_map>& name2dim, + Rcpp::List lst) { std::vector colnames = lst.attr("names"); - for (auto& nm : colnames) { + for (auto& nm: colnames) { auto dm = name2dim[nm]; auto tp = dm->type(); bool suitable = false; if (tp == TILEDB_UINT64) { Rcpp::NumericMatrix mm = lst[nm]; - Rcpp::NumericMatrix::Column lo = mm.column( - 0); // works as proxy for int and float types - Rcpp::NumericMatrix::Column hi = mm.column( - 1); // works as proxy for int and float types + Rcpp::NumericMatrix::Column lo = mm.column(0); // works as proxy for int and float types + Rcpp::NumericMatrix::Column hi = mm.column(1); // works as proxy for int and float types std::vector> vp(mm.nrow()); - const std::pair pr = dm->domain(); - for (int i = 0; i < mm.nrow(); i++) { + const std::pair pr = dm->domain(); + for (int i=0; i(Rcpp::fromInteger64(lo[i])); uint64_t h = static_cast(Rcpp::fromInteger64(hi[i])); - vp[i] = std::make_pair( - std::max(l, pr.first), std::min(h, pr.second)); - spdl::info( - "[apply_dim_ranges] Applying dim point {} on {} with {} - " - "{}", - i, - nm, - l, - h); - suitable = l < pr.second && - h > pr.first; // lower must be less than max, higher - // more than min + vp[i] = std::make_pair(std::max(l,pr.first), std::min(h, pr.second)); + spdl::info("[apply_dim_ranges] Applying dim point {} on {} with {} - {}", i, nm, l, h) ; + suitable = l < pr.second && h > pr.first; // lower must be less than max, higher more than min } - if (suitable) - sr->set_dim_ranges(nm, vp); + if (suitable) sr->set_dim_ranges(nm, vp); } else if (tp == TILEDB_INT64) { Rcpp::NumericMatrix mm = lst[nm]; std::vector lo = Rcpp::fromInteger64(mm.column(0), false); std::vector hi = Rcpp::fromInteger64(mm.column(1), false); std::vector> vp(mm.nrow()); - const std::pair pr = dm->domain(); - for (int i = 0; i < mm.nrow(); i++) { - vp[i] = std::make_pair( - std::max(lo[i], pr.first), std::min(hi[i], pr.second)); - spdl::info( - "[apply_dim_ranges] Applying dim point {} on {} with {} - " - "{}", - i, - nm, - lo[i], - hi[i]); - suitable = lo[i] < pr.second && - hi[i] > pr.first; // lower must be less than max, - // higher more than min + const std::pair pr = dm->domain(); + for (int i=0; i pr.first; // lower must be less than max, higher more than min } - if (suitable) - sr->set_dim_ranges(nm, vp); + if (suitable) sr->set_dim_ranges(nm, vp); } else if (tp == TILEDB_FLOAT32) { Rcpp::NumericMatrix mm = lst[nm]; - Rcpp::NumericMatrix::Column lo = mm.column( - 0); // works as proxy for int and float types - Rcpp::NumericMatrix::Column hi = mm.column( - 1); // works as proxy for int and float types + Rcpp::NumericMatrix::Column lo = mm.column(0); // works as proxy for int and float types + Rcpp::NumericMatrix::Column hi = mm.column(1); // works as proxy for int and float types std::vector> vp(mm.nrow()); - const std::pair pr = dm->domain(); - for (int i = 0; i < mm.nrow(); i++) { + const std::pair pr = dm->domain(); + for (int i=0; i(lo[i]); float h = static_cast(hi[i]); - vp[i] = std::make_pair( - std::max(l, pr.first), std::min(h, pr.second)); - spdl::info( - "[apply_dim_ranges] Applying dim point {} on {} with {} - " - "{}", - i, - nm, - l, - h); - suitable = l < pr.second && - h > pr.first; // lower must be less than max, higher - // more than min + vp[i] = std::make_pair(std::max(l,pr.first), std::min(h, pr.second)); + spdl::info("[apply_dim_ranges] Applying dim point {} on {} with {} - {}", i, nm, l, h) ; + suitable = l < pr.second && h > pr.first; // lower must be less than max, higher more than min } - if (suitable) - sr->set_dim_ranges(nm, vp); + if (suitable) sr->set_dim_ranges(nm, vp); } else if (tp == TILEDB_FLOAT64) { Rcpp::NumericMatrix mm = lst[nm]; - Rcpp::NumericMatrix::Column lo = mm.column( - 0); // works as proxy for int and float types - Rcpp::NumericMatrix::Column hi = mm.column( - 1); // works as proxy for int and float types + Rcpp::NumericMatrix::Column lo = mm.column(0); // works as proxy for int and float types + Rcpp::NumericMatrix::Column hi = mm.column(1); // works as proxy for int and float types std::vector> vp(mm.nrow()); - const std::pair pr = dm->domain(); - for (int i = 0; i < mm.nrow(); i++) { - vp[i] = std::make_pair( - std::max(lo[i], pr.first), std::min(hi[i], pr.second)); - spdl::info( - "[apply_dim_ranges] Applying dim point {} on {} with {} - " - "{}", - i, - nm, - lo[i], - hi[i]); - suitable = lo[i] < pr.second && - hi[i] > pr.first; // lower must be less than max, - // higher more than min + const std::pair pr = dm->domain(); + for (int i=0; i pr.first; // lower must be less than max, higher more than min } - if (suitable) - sr->set_dim_ranges(nm, vp); + if (suitable) sr->set_dim_ranges(nm, vp); } else if (tp == TILEDB_INT32) { Rcpp::IntegerMatrix mm = lst[nm]; - Rcpp::IntegerMatrix::Column lo = mm.column( - 0); // works as proxy for int and float types - Rcpp::IntegerMatrix::Column hi = mm.column( - 1); // works as proxy for int and float types + Rcpp::IntegerMatrix::Column lo = mm.column(0); // works as proxy for int and float types + Rcpp::IntegerMatrix::Column hi = mm.column(1); // works as proxy for int and float types std::vector> vp(mm.nrow()); - const std::pair pr = dm->domain(); - for (int i = 0; i < mm.nrow(); i++) { - vp[i] = std::make_pair( - std::max(lo[i], pr.first), std::min(hi[i], pr.second)); - spdl::info( - "[apply_dim_ranges] Applying dim point {} on {} with {} - " - "{}", - i, - nm[i], - lo[i], - hi[i]); - suitable = lo[i] < pr.second && - hi[i] > pr.first; // lower must be less than max, - // higher more than min + const std::pair pr = dm->domain(); + for (int i=0; i pr.first; // lower must be less than max, higher more than min } - if (suitable) - sr->set_dim_ranges(nm, vp); + if (suitable) sr->set_dim_ranges(nm, vp); } else { - Rcpp::stop( - "Currently unsupported type: ", tiledb::impl::to_str(tp)); + Rcpp::stop("Currently unsupported type: ", tiledb::impl::to_str(tp)); } if (!suitable) { - Rcpp::stop( - "Unsuitable dim ranges on dimension '%s' with domain %s", - nm, - dm->domain_to_str()); + Rcpp::stop("Unsuitable dim ranges on dimension '%s' with domain %s", nm, dm->domain_to_str()); } } } + // initialize arrow schema and array, respectively -Rcpp::XPtr schema_setup_struct( - Rcpp::XPtr schxp, int64_t n_children) { +Rcpp::XPtr schema_setup_struct(Rcpp::XPtr schxp, int64_t n_children) { ArrowSchema* schema = schxp.get(); auto type = NANOARROW_TYPE_STRUCT; - ArrowSchemaInit(schema); // modified from ArrowSchemaInitFromType() + ArrowSchemaInit(schema); // modified from ArrowSchemaInitFromType() int result = ArrowSchemaSetType(schema, type); if (result != NANOARROW_OK) { schema->release(schema); @@ -270,24 +184,20 @@ Rcpp::XPtr schema_setup_struct( } // now adapted from ArrowSchemaAllocateChildren - if (schema->children != NULL) - Rcpp::stop("Error allocation as children not null"); + if (schema->children != NULL) Rcpp::stop("Error allocation as children not null"); if (n_children > 0) { - auto ptr = (struct ArrowSchema**)ArrowMalloc( - n_children * sizeof(struct ArrowSchema*)); + auto ptr = (struct ArrowSchema**) ArrowMalloc(n_children * sizeof(struct ArrowSchema*)); Rcpp::XPtr schema_ptrxp = make_xptr(ptr, false); schema->children = schema_ptrxp.get(); - if (schema->children == NULL) - Rcpp::stop("Failed to allocate ArrowSchema*"); + if (schema->children == NULL) Rcpp::stop("Failed to allocate ArrowSchema*"); schema->n_children = n_children; memset(schema->children, 0, n_children * sizeof(struct ArrowSchema*)); for (int64_t i = 0; i < n_children; i++) { schema->children[i] = schema_owning_xptr(); - if (schema->children[i] == NULL) - Rcpp::stop("Error allocation schema child %ld", i); + if (schema->children[i] == NULL) Rcpp::stop("Error allocation schema child %ld", i); schema->children[i]->release = NULL; } } @@ -295,15 +205,12 @@ Rcpp::XPtr schema_setup_struct( } extern "C" { -void ArrowArrayRelease( - struct ArrowArray* array); // made non-static in nanoarrow.c -ArrowErrorCode ArrowArraySetStorageType( - struct ArrowArray* array, // ditto - enum ArrowType storage_type); + void ArrowArrayRelease(struct ArrowArray *array); // made non-static in nanoarrow.c + ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray* array, // ditto + enum ArrowType storage_type); } -Rcpp::XPtr array_setup_struct( - Rcpp::XPtr arrxp, int64_t n_children) { +Rcpp::XPtr array_setup_struct(Rcpp::XPtr arrxp, int64_t n_children) { ArrowArray* array = arrxp.get(); auto storage_type = NANOARROW_TYPE_STRUCT; @@ -318,8 +225,7 @@ Rcpp::XPtr array_setup_struct( array->release = &ArrowArrayRelease; array->private_data = NULL; - auto private_data = (struct ArrowArrayPrivateData*)ArrowMalloc( - sizeof(struct ArrowArrayPrivateData)); + auto private_data = (struct ArrowArrayPrivateData*) ArrowMalloc(sizeof(struct ArrowArrayPrivateData)); if (private_data == NULL) { array->release = NULL; Rcpp::stop("Error allocating array private data"); @@ -339,49 +245,45 @@ Rcpp::XPtr array_setup_struct( } ArrowLayoutInit(&private_data->layout, storage_type); - // We can only know this not to be true when initializing based on a schema - // so assume this to be true. + // We can only know this not to be true when initializing based on a schema so assume this to be true. private_data->union_type_id_is_child_index = 1; + // remainder from ArrowArrayAllocateChildren() - if (array->children != NULL) - Rcpp::stop("Error allocating array children as pointer not null"); + if (array->children != NULL) Rcpp::stop("Error allocating array children as pointer not null"); if (n_children == 0) { return arrxp; } - auto ptr = (struct ArrowArray**)ArrowMalloc( - n_children * sizeof(struct ArrowArray*)); + auto ptr = (struct ArrowArray**) ArrowMalloc(n_children * sizeof(struct ArrowArray*)); Rcpp::XPtr array_ptrxp = make_xptr(ptr, false); array->children = array_ptrxp.get(); - if (array->children == NULL) - Rcpp::stop("Failed to allocated ArrayArray*"); + if (array->children == NULL) Rcpp::stop("Failed to allocated ArrayArray*"); memset(array->children, 0, n_children * sizeof(struct ArrowArray*)); for (int64_t i = 0; i < n_children; i++) { array->children[i] = array_owning_xptr(); - if (array->children[i] == NULL) - Rcpp::stop("Error allocation array child %ld", i); + if (array->children[i] == NULL) Rcpp::stop("Error allocation array child %ld", i); array->children[i]->release = NULL; } array->n_children = n_children; return arrxp; } + // formerly stats.cpp //' TileDB SOMA statistics //' -//' These functions expose the TileDB Core functionality for performance -//measurements ' and statistics. +//' These functions expose the TileDB Core functionality for performance measurements +//' and statistics. //' -//' - `tiledbsoma_stats_enable()`/`tiledbsoma_stats_disable()`: Enable and -//disable TileDB's internal statistics. ' - `tiledbsoma_stats_reset()`: Reset -//all statistics to 0. ' - `tiledbsoma_stats_dump()`: Dump all statistics to a -//JSON string. ' - `tiledbsoma_stats_show()`: Print all statistics to the -//console. +//' - `tiledbsoma_stats_enable()`/`tiledbsoma_stats_disable()`: Enable and disable TileDB's internal statistics. +//' - `tiledbsoma_stats_reset()`: Reset all statistics to 0. +//' - `tiledbsoma_stats_dump()`: Dump all statistics to a JSON string. +//' - `tiledbsoma_stats_show()`: Print all statistics to the console. //' //' @name tiledbsoma_stats //' @export @@ -415,9 +317,10 @@ std::string tiledbsoma_stats_dump() { //' libtiledbsoma version //' -//' Returns a string with version information for libtiledbsoma and the linked -//TileDB Embedded library. ' If argument `compact` is set to `TRUE`, a shorter -//version of just the TileDB Embedded library ' version is returned, ' @noRd +//' Returns a string with version information for libtiledbsoma and the linked TileDB Embedded library. +//' If argument `compact` is set to `TRUE`, a shorter version of just the TileDB Embedded library +//' version is returned, +//' @noRd // [[Rcpp::export]] std::string libtiledbsoma_version(const bool compact = false) { if (compact) { @@ -436,10 +339,8 @@ std::string libtiledbsoma_version(const bool compact = false) { //' @noRd // [[Rcpp::export]] Rcpp::IntegerVector tiledb_embedded_version() { - std::tuple - triple = tiledbsoma::version::embedded_version_triple(); - return Rcpp::IntegerVector::create( - std::get<0>(triple), std::get<1>(triple), std::get<2>(triple)); + std::tuple triple = tiledbsoma::version::embedded_version_triple(); + return Rcpp::IntegerVector::create(std::get<0>(triple), std::get<1>(triple), std::get<2>(triple)); } // Also present in tiledb-r but only after 0.23.0 so this can be removed (and @@ -447,22 +348,13 @@ Rcpp::IntegerVector tiledb_embedded_version() { //' @noRd // [[Rcpp::export]] size_t tiledb_datatype_max_value(const std::string& datatype) { - if (datatype == "INT8") - return std::numeric_limits::max(); - else if (datatype == "UINT8") - return std::numeric_limits::max(); - else if (datatype == "INT16") - return std::numeric_limits::max(); - else if (datatype == "UINT16") - return std::numeric_limits::max(); - else if (datatype == "INT32") - return std::numeric_limits::max(); - else if (datatype == "UINT32") - return std::numeric_limits::max(); - else if (datatype == "INT64") - return std::numeric_limits::max(); - else if (datatype == "UINT64") - return std::numeric_limits::max(); - else - Rcpp::stop("currently unsupported datatype (%s)", datatype); + if (datatype == "INT8") return std::numeric_limits::max(); + else if (datatype == "UINT8") return std::numeric_limits::max(); + else if (datatype == "INT16") return std::numeric_limits::max(); + else if (datatype == "UINT16") return std::numeric_limits::max(); + else if (datatype == "INT32") return std::numeric_limits::max(); + else if (datatype == "UINT32") return std::numeric_limits::max(); + else if (datatype == "INT64") return std::numeric_limits::max(); + else if (datatype == "UINT64") return std::numeric_limits::max(); + else Rcpp::stop("currently unsupported datatype (%s)", datatype); } diff --git a/apis/r/src/rutilities.h b/apis/r/src/rutilities.h index ada5398d8a..3669252ae9 100644 --- a/apis/r/src/rutilities.h +++ b/apis/r/src/rutilities.h @@ -12,9 +12,9 @@ namespace tdbs = tiledbsoma; #define TileDB_Version(v, m, p) (((v)*65536) + ((m)*256) + (p)) // current build is encoded in TILEDB_VERSION -#define TILEDB_VERSION \ - TileDB_Version( \ - TILEDB_VERSION_MAJOR, TILEDB_VERSION_MINOR, TILEDB_VERSION_PATCH) +#define TILEDB_VERSION TileDB_Version(TILEDB_VERSION_MAJOR, \ + TILEDB_VERSION_MINOR, \ + TILEDB_VERSION_PATCH) // Applies (named list of) vectors of points to the named dimensions void apply_dim_points( @@ -31,42 +31,34 @@ void apply_dim_ranges( Rcpp::List lst); // Convert R config vector to map suitable for SOMAArray -inline std::map config_vector_to_map( - Rcpp::Nullable config) { +inline std::map config_vector_to_map(Rcpp::Nullable config) { std::map platform_config; if (!config.isNull()) { Rcpp::CharacterVector confvec(config.get()); - Rcpp::CharacterVector namesvec = confvec.attr( - "names"); // extract names from named R vector + Rcpp::CharacterVector namesvec = confvec.attr("names"); // extract names from named R vector size_t n = confvec.length(); - for (size_t i = 0; i < n; i++) { - platform_config.emplace(std::make_pair( - std::string(namesvec[i]), std::string(confvec[i]))); - spdl::trace( - "[config_vector_to_map] adding '{}' = '{}'", - std::string(namesvec[i]), - std::string(confvec[i])); + for (size_t i = 0; i result_order_map{ - {"auto", ResultOrder::automatic}, - {"row-major", ResultOrder::rowmajor}, - {"column-major", ResultOrder::colmajor}}; - return result_order_map[result_order]; +inline ResultOrder get_tdb_result_order(std::string result_order){ + std::map result_order_map{ + {"auto", ResultOrder::automatic}, + {"row-major", ResultOrder::rowmajor}, + {"column-major", ResultOrder::colmajor} + }; + return result_order_map[result_order]; } struct ContextWrapper { - // ContextWrapper(std::shared_ptr ctx_ptr_) : - // ctxptr(std::move(ctx_ptr_)) {} - ContextWrapper(std::shared_ptr ctx_ptr_) - : ctxptr(ctx_ptr_) { - } + //ContextWrapper(std::shared_ptr ctx_ptr_) : ctxptr(std::move(ctx_ptr_)) {} + ContextWrapper(std::shared_ptr ctx_ptr_) : ctxptr(ctx_ptr_) {} std::shared_ptr ctxptr; }; typedef struct ContextWrapper ctx_wrap_t; diff --git a/apis/r/src/xptr-utils.h b/apis/r/src/xptr-utils.h index adaed2a856..d5610bc9ac 100644 --- a/apis/r/src/xptr-utils.h +++ b/apis/r/src/xptr-utils.h @@ -2,114 +2,78 @@ // enum for TileDB XPtr Object type using int32_t payload (for R) enum tiledb_xptr_object : int32_t {}; -const tiledb_xptr_object tiledb_xptr_default{0}; -const tiledb_xptr_object tiledb_xptr_object_array{10}; -const tiledb_xptr_object tiledb_xptr_object_arrayschema{20}; -const tiledb_xptr_object tiledb_xptr_object_arrayschemaevolution{30}; -const tiledb_xptr_object tiledb_xptr_object_attribute{40}; -const tiledb_xptr_object tiledb_xptr_object_config{50}; -const tiledb_xptr_object tiledb_xptr_object_context{60}; -const tiledb_xptr_object tiledb_xptr_object_dimension{70}; -const tiledb_xptr_object tiledb_xptr_object_domain{80}; -const tiledb_xptr_object tiledb_xptr_object_filter{90}; -const tiledb_xptr_object tiledb_xptr_object_filterlist{100}; -const tiledb_xptr_object tiledb_xptr_object_fragmentinfo{110}; -const tiledb_xptr_object tiledb_xptr_object_group{120}; -const tiledb_xptr_object tiledb_xptr_object_query{130}; -const tiledb_xptr_object tiledb_xptr_object_querycondition{140}; -const tiledb_xptr_object tiledb_xptr_object_vfs{150}; -const tiledb_xptr_object tiledb_xptr_vfs_fh_t{160}; -const tiledb_xptr_object tiledb_xptr_vlc_buf_t{170}; -const tiledb_xptr_object tiledb_xptr_vlv_buf_t{180}; -const tiledb_xptr_object tiledb_xptr_query_buf_t{190}; +const tiledb_xptr_object tiledb_xptr_default { 0 }; +const tiledb_xptr_object tiledb_xptr_object_array { 10 }; +const tiledb_xptr_object tiledb_xptr_object_arrayschema { 20 }; +const tiledb_xptr_object tiledb_xptr_object_arrayschemaevolution { 30 }; +const tiledb_xptr_object tiledb_xptr_object_attribute { 40 }; +const tiledb_xptr_object tiledb_xptr_object_config { 50 }; +const tiledb_xptr_object tiledb_xptr_object_context { 60 }; +const tiledb_xptr_object tiledb_xptr_object_dimension { 70 }; +const tiledb_xptr_object tiledb_xptr_object_domain { 80 }; +const tiledb_xptr_object tiledb_xptr_object_filter { 90 }; +const tiledb_xptr_object tiledb_xptr_object_filterlist { 100 }; +const tiledb_xptr_object tiledb_xptr_object_fragmentinfo { 110 }; +const tiledb_xptr_object tiledb_xptr_object_group { 120 }; +const tiledb_xptr_object tiledb_xptr_object_query { 130 }; +const tiledb_xptr_object tiledb_xptr_object_querycondition { 140 }; +const tiledb_xptr_object tiledb_xptr_object_vfs { 150 }; +const tiledb_xptr_object tiledb_xptr_vfs_fh_t { 160 }; +const tiledb_xptr_object tiledb_xptr_vlc_buf_t { 170 }; +const tiledb_xptr_object tiledb_xptr_vlv_buf_t { 180 }; +const tiledb_xptr_object tiledb_xptr_query_buf_t { 190 }; -// the definitions above are internal to tiledb-r but we need a new value here -// if we want tag the external pointer -const tiledb_xptr_object tiledb_arrow_array_t{300}; -const tiledb_xptr_object tiledb_arrow_schema_t{310}; +// the definitions above are internal to tiledb-r but we need a new value here if we want tag the external pointer +const tiledb_xptr_object tiledb_arrow_array_t { 300 }; +const tiledb_xptr_object tiledb_arrow_schema_t { 310 }; -const tiledb_xptr_object tiledb_soma_reader_t{500}; +const tiledb_xptr_object tiledb_soma_reader_t { 500 }; // templated checkers for external pointer tags -template -const int32_t XPtrTagType = tiledb_xptr_default; // clang++ wants a value -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_array; -template <> -inline const int32_t - XPtrTagType = tiledb_xptr_object_arrayschema; -template <> -inline const int32_t XPtrTagType = - tiledb_xptr_object_arrayschemaevolution; -template <> -inline const int32_t - XPtrTagType = tiledb_xptr_object_attribute; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_config; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_context; -template <> -inline const int32_t - XPtrTagType = tiledb_xptr_object_dimension; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_domain; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_filter; -template <> -inline const int32_t - XPtrTagType = tiledb_xptr_object_filterlist; -template <> -inline const int32_t - XPtrTagType = tiledb_xptr_object_fragmentinfo; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_group; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_query; -template <> -inline const int32_t - XPtrTagType = tiledb_xptr_object_query; -template <> -inline const int32_t XPtrTagType = tiledb_xptr_object_vfs; +template const int32_t XPtrTagType = tiledb_xptr_default; // clang++ wants a value +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_array; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_arrayschema; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_arrayschemaevolution; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_attribute; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_config; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_context; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_dimension; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_domain; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_filter; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_filterlist; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_fragmentinfo; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_group; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_query; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_query; +template <> inline const int32_t XPtrTagType = tiledb_xptr_object_vfs; // this need the C API for which we do not include a header -// template <> inline const int32_t XPtrTagType = -// tiledb_xptr_vfs_fh_t; template <> inline const int32_t XPtrTagType -// = tiledb_xptr_vlc_buf_t; template <> inline const int32_t -// XPtrTagType = tiledb_xptr_vlv_buf_t; template -// <> inline const int32_t XPtrTagType = -// tiledb_xptr_query_buf_t; +// template <> inline const int32_t XPtrTagType = tiledb_xptr_vfs_fh_t; +// template <> inline const int32_t XPtrTagType = tiledb_xptr_vlc_buf_t; +// template <> inline const int32_t XPtrTagType = tiledb_xptr_vlv_buf_t; +// template <> inline const int32_t XPtrTagType = tiledb_xptr_query_buf_t; -template <> -inline const int32_t XPtrTagType = tiledb_arrow_array_t; -template <> -inline const int32_t XPtrTagType = tiledb_arrow_schema_t; -template <> -inline const int32_t XPtrTagType = tiledb_soma_reader_t; +template <> inline const int32_t XPtrTagType = tiledb_arrow_array_t; +template <> inline const int32_t XPtrTagType = tiledb_arrow_schema_t; -template -Rcpp::XPtr make_xptr(T* p, bool finalize = true) { +template <> inline const int32_t XPtrTagType = tiledb_soma_reader_t; + +template Rcpp::XPtr make_xptr(T* p, bool finalize=true) { return Rcpp::XPtr(p, finalize, Rcpp::wrap(XPtrTagType), R_NilValue); } -template -Rcpp::XPtr make_xptr(SEXP p) { - return Rcpp::XPtr( - p); // the default XPtr ctor with deleter on and tag and prot nil +template Rcpp::XPtr make_xptr(SEXP p) { + return Rcpp::XPtr(p); // the default XPtr ctor with deleter on and tag and prot nil } -template -void check_xptr_tag(Rcpp::XPtr ptr) { +template void check_xptr_tag(Rcpp::XPtr ptr) { if (R_ExternalPtrTag(ptr) == R_NilValue) { - Rcpp::stop( - "External pointer without tag, expected tag %d\n", XPtrTagType); + Rcpp::stop("External pointer without tag, expected tag %d\n", XPtrTagType); } if (R_ExternalPtrTag(ptr) != R_NilValue) { int32_t tag = Rcpp::as(R_ExternalPtrTag(ptr)); if (XPtrTagType != tag) { - Rcpp::stop( - "Wrong tag type: expected %d but received %d\n", - XPtrTagType, - tag); + Rcpp::stop("Wrong tag type: expected %d but received %d\n", XPtrTagType, tag); } } } @@ -117,3 +81,4 @@ void check_xptr_tag(Rcpp::XPtr ptr) { // in rinterface.cpp Rcpp::XPtr schema_owning_xptr(void); Rcpp::XPtr array_owning_xptr(void); +