Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into cmakelint
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcoGorelli committed Feb 28, 2021
2 parents 6b28115 + b26488d commit b004bc6
Show file tree
Hide file tree
Showing 240 changed files with 9,747 additions and 3,319 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ jobs:
JDK: 11

allow_failures:
- arch: s390x
- name: "Go on s390x"
- name: "Java on s390x"

before_install:
- eval "$(python ci/detect-changes.py)"
Expand Down
27 changes: 4 additions & 23 deletions c_glib/arrow-dataset-glib/scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ gad_scan_options_class_init(GADScanOptionsClass *klass)
gobject_class->set_property = gad_scan_options_set_property;
gobject_class->get_property = gad_scan_options_get_property;

auto scan_options = arrow::dataset::ScanOptions::Make(arrow::schema({}));
auto scan_options = std::make_shared<arrow::dataset::ScanOptions>();

spec = g_param_spec_pointer("scan-options",
"ScanOptions",
Expand Down Expand Up @@ -307,7 +307,8 @@ GADScanOptions *
gad_scan_options_new(GArrowSchema *schema)
{
auto arrow_schema = garrow_schema_get_raw(schema);
auto arrow_scan_options = arrow::dataset::ScanOptions::Make(arrow_schema);
auto arrow_scan_options = std::make_shared<arrow::dataset::ScanOptions>();
arrow_scan_options->dataset_schema = arrow_schema;
return gad_scan_options_new_raw(&arrow_scan_options);
}

Expand All @@ -323,30 +324,10 @@ GArrowSchema *
gad_scan_options_get_schema(GADScanOptions *scan_options)
{
auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(scan_options);
auto arrow_schema = priv->scan_options->schema();
auto arrow_schema = priv->scan_options->dataset_schema;
return garrow_schema_new_raw(&arrow_schema);
}

/**
* gad_scan_options_replace_schema:
* @scan_options: A #GADScanOptions.
* @schema: A #GArrowSchema.
*
* Returns: (transfer full):
* A copy of the #GADScanOptions with the given #GArrowSchema.
*
* Since: 1.0.0
*/
GADScanOptions *
gad_scan_options_replace_schema(GADScanOptions *scan_options,
GArrowSchema *schema)
{
auto priv = GAD_SCAN_OPTIONS_GET_PRIVATE(scan_options);
auto arrow_schema = garrow_schema_get_raw(schema);
auto arrow_scan_options_copy = priv->scan_options->ReplaceSchema(arrow_schema);
return gad_scan_options_new_raw(&arrow_scan_options_copy);
}

/* arrow::dataset::ScanTask */

typedef struct GADScanTaskPrivate_ {
Expand Down
3 changes: 0 additions & 3 deletions c_glib/arrow-dataset-glib/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ GARROW_AVAILABLE_IN_1_0
GADScanOptions *gad_scan_options_new(GArrowSchema *schema);
GARROW_AVAILABLE_IN_1_0
GArrowSchema *gad_scan_options_get_schema(GADScanOptions *scan_options);
GARROW_AVAILABLE_IN_1_0
GADScanOptions *gad_scan_options_replace_schema(GADScanOptions *scan_options,
GArrowSchema *schema);

/* arrow::dataset::ScanTask */

Expand Down
3 changes: 1 addition & 2 deletions c_glib/arrow-glib/reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1591,8 +1591,7 @@ garrow_csv_reader_new(GArrowInputStream *input,
}

auto arrow_reader =
arrow::csv::TableReader::Make(arrow::default_memory_pool(),
arrow::io::AsyncContext(),
arrow::csv::TableReader::Make(arrow::io::default_io_context(),
arrow_input,
read_options,
parse_options,
Expand Down
7 changes: 0 additions & 7 deletions c_glib/test/dataset/test-scan-options.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,4 @@ def test_batch_size
assert_equal(42,
@scan_options.batch_size)
end

def test_replace_schema
other_schema = Arrow::Schema.new([Arrow::Field.new("visible", Arrow::BooleanDataType.new)])
other_scan_options = @scan_options.replace_schema(other_schema)
assert_not_equal(@schema, other_scan_options.schema)
assert_equal(other_schema, other_scan_options.schema)
end
end
4 changes: 2 additions & 2 deletions c_glib/test/test-decimal128-data-type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ def test_type

def test_name
data_type = Arrow::Decimal128DataType.new(2, 0)
assert_equal("decimal", data_type.name)
assert_equal("decimal128", data_type.name)
end

def test_to_s
data_type = Arrow::Decimal128DataType.new(2, 0)
assert_equal("decimal(2, 0)", data_type.to_s)
assert_equal("decimal128(2, 0)", data_type.to_s)
end

def test_precision
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/minimal_build/example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ Status RunMain(int argc, char** argv) {
ARROW_ASSIGN_OR_RAISE(
auto csv_reader,
arrow::csv::TableReader::Make(arrow::default_memory_pool(),
arrow::io::AsyncContext(),
arrow::io::default_io_context(),
input_file,
arrow::csv::ReadOptions::Defaults(),
arrow::csv::ParseOptions::Defaults(),
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,9 @@ if(ARROW_CSV)
csv/options.cc
csv/parser.cc
csv/reader.cc)
if(ARROW_COMPUTE)
list(APPEND ARROW_SRCS csv/writer.cc)
endif()

list(APPEND ARROW_TESTING_SRCS csv/test_common.cc)
endif()
Expand All @@ -366,6 +369,7 @@ if(ARROW_COMPUTE)
compute/kernels/aggregate_basic.cc
compute/kernels/aggregate_mode.cc
compute/kernels/aggregate_quantile.cc
compute/kernels/aggregate_tdigest.cc
compute/kernels/aggregate_var_std.cc
compute/kernels/codegen_internal.cc
compute/kernels/scalar_arithmetic.cc
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/compute/api_aggregate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,5 +68,10 @@ Result<Datum> Quantile(const Datum& value, const QuantileOptions& options,
return CallFunction("quantile", {value}, &options, ctx);
}

Result<Datum> TDigest(const Datum& value, const TDigestOptions& options,
ExecContext* ctx) {
return CallFunction("tdigest", {value}, &options, ctx);
}

} // namespace compute
} // namespace arrow
36 changes: 36 additions & 0 deletions cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,28 @@ struct ARROW_EXPORT QuantileOptions : public FunctionOptions {
enum Interpolation interpolation;
};

/// \brief Control TDigest approximate quantile kernel behavior
///
/// By default, returns the median value.
struct ARROW_EXPORT TDigestOptions : public FunctionOptions {
explicit TDigestOptions(double q = 0.5, uint32_t delta = 100,
uint32_t buffer_size = 500)
: q{q}, delta{delta}, buffer_size{buffer_size} {}

explicit TDigestOptions(std::vector<double> q, uint32_t delta = 100,
uint32_t buffer_size = 500)
: q{std::move(q)}, delta{delta}, buffer_size{buffer_size} {}

static TDigestOptions Defaults() { return TDigestOptions{}; }

/// quantile must be between 0 and 1 inclusive
std::vector<double> q;
/// compression parameter, default 100
uint32_t delta;
/// input buffer size, default 500
uint32_t buffer_size;
};

/// @}

/// \brief Count non-null (or null) values in an array.
Expand Down Expand Up @@ -270,5 +292,19 @@ Result<Datum> Quantile(const Datum& value,
const QuantileOptions& options = QuantileOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Calculate the approximate quantiles of a numeric array with T-Digest algorithm
///
/// \param[in] value input datum, expecting Array or ChunkedArray
/// \param[in] options see TDigestOptions for more information
/// \param[in] ctx the function execution context, optional
/// \return resulting datum as an array
///
/// \since 4.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> TDigest(const Datum& value,
const TDigestOptions& options = TDigestOptions::Defaults(),
ExecContext* ctx = NULLPTR);

} // namespace compute
} // namespace arrow
17 changes: 16 additions & 1 deletion cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,25 @@ struct CompareOptions : public FunctionOptions {
};

struct ARROW_EXPORT ProjectOptions : public FunctionOptions {
explicit ProjectOptions(std::vector<std::string> n) : field_names(std::move(n)) {}
ProjectOptions(std::vector<std::string> n, std::vector<bool> r,
std::vector<std::shared_ptr<const KeyValueMetadata>> m)
: field_names(std::move(n)),
field_nullability(std::move(r)),
field_metadata(std::move(m)) {}

explicit ProjectOptions(std::vector<std::string> n)
: field_names(std::move(n)),
field_nullability(field_names.size(), true),
field_metadata(field_names.size(), NULLPTR) {}

/// Names for wrapped columns
std::vector<std::string> field_names;

/// Nullability bits for wrapped columns
std::vector<bool> field_nullability;

/// Metadata attached to wrapped columns
std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
};

/// @}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@ Result<std::shared_ptr<Array>> Unique(const Datum& value, ExecContext* ctx) {
return result.make_array();
}

Result<Datum> DictionaryEncode(const Datum& value, ExecContext* ctx) {
return CallFunction("dictionary_encode", {value}, ctx);
Result<Datum> DictionaryEncode(const Datum& value, const DictionaryEncodeOptions& options,
ExecContext* ctx) {
return CallFunction("dictionary_encode", {value}, &options, ctx);
}

const char kValuesFieldName[] = "values";
Expand Down
35 changes: 34 additions & 1 deletion cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,24 @@ enum class SortOrder {
Descending,
};

/// \brief Options for the dictionary encode function
struct DictionaryEncodeOptions : public FunctionOptions {
/// Configure how null values will be encoded
enum NullEncodingBehavior {
/// the null value will be added to the dictionary with a proper index
ENCODE,
/// the null value will be masked in the indices array
MASK
};

explicit DictionaryEncodeOptions(NullEncodingBehavior null_encoding = MASK)
: null_encoding_behavior(null_encoding) {}

static DictionaryEncodeOptions Defaults() { return DictionaryEncodeOptions(); }

NullEncodingBehavior null_encoding_behavior = MASK;
};

/// \brief One sort key for PartitionNthIndices (TODO) and SortIndices
struct ARROW_EXPORT SortKey {
explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending)
Expand Down Expand Up @@ -289,14 +307,29 @@ Result<std::shared_ptr<StructArray>> ValueCounts(const Datum& value,
ExecContext* ctx = NULLPTR);

/// \brief Dictionary-encode values in an array-like object
///
/// Any nulls encountered in the dictionary will be handled according to the
/// specified null encoding behavior.
///
/// For example, given values ["a", "b", null, "a", null] the output will be
/// (null_encoding == ENCODE) Indices: [0, 1, 2, 0, 2] / Dict: ["a", "b", null]
/// (null_encoding == MASK) Indices: [0, 1, null, 0, null] / Dict: ["a", "b"]
///
/// If the input is already dictionary encoded this function is a no-op unless
/// it needs to modify the null_encoding (TODO)
///
/// \param[in] data array-like input
/// \param[in] ctx the function execution context, optional
/// \param[in] options configures null encoding behavior
/// \return result with same shape and type as input
///
/// \since 1.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> DictionaryEncode(const Datum& data, ExecContext* ctx = NULLPTR);
Result<Datum> DictionaryEncode(
const Datum& data,
const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(),
ExecContext* ctx = NULLPTR);

// ----------------------------------------------------------------------
// Deprecated functions
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ class KernelExecutorImpl : public KernelExecutor {
if (validity_preallocated_) {
ARROW_ASSIGN_OR_RAISE(out->buffers[0], kernel_ctx_->AllocateBitmap(length));
}
if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
out->null_count = 0;
}
for (size_t i = 0; i < data_preallocated_.size(); ++i) {
const auto& prealloc = data_preallocated_[i];
if (prealloc.bit_width >= 0) {
Expand Down
Loading

0 comments on commit b004bc6

Please sign in to comment.