From 88aaf24d6f37079054151ed4da706fbbd33022a0 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 16 Jan 2025 10:40:38 +0100 Subject: [PATCH] GH-45279: [C++][Compute] Move all Grouper tests to grouper_test.cc --- cpp/src/arrow/acero/CMakeLists.txt | 3 +- cpp/src/arrow/acero/aggregate_node.h | 1 + cpp/src/arrow/acero/aggregate_node_test.cc | 1 + cpp/src/arrow/acero/asof_join_node_test.cc | 3 +- cpp/src/arrow/acero/hash_aggregate_test.cc | 927 ----------------- cpp/src/arrow/acero/hash_join_node_test.cc | 12 +- cpp/src/arrow/acero/plan_test.cc | 1 + cpp/src/arrow/acero/test_util_internal.cc | 87 +- cpp/src/arrow/acero/test_util_internal.h | 12 - cpp/src/arrow/acero/tpch_node_test.cc | 1 - cpp/src/arrow/compute/CMakeLists.txt | 23 +- cpp/src/arrow/compute/kernels/CMakeLists.txt | 35 +- .../arrow/compute/kernels/aggregate_test.cc | 2 +- .../compute/kernels/scalar_arithmetic_test.cc | 2 +- .../compute/kernels/scalar_boolean_test.cc | 2 +- .../arrow/compute/kernels/scalar_cast_test.cc | 2 +- .../compute/kernels/scalar_compare_test.cc | 2 +- .../compute/kernels/scalar_if_else_test.cc | 2 +- .../compute/kernels/scalar_nested_test.cc | 2 +- .../compute/kernels/scalar_random_test.cc | 2 +- .../kernels/scalar_round_arithmetic_test.cc | 2 +- .../compute/kernels/scalar_set_lookup_test.cc | 2 +- .../compute/kernels/scalar_string_test.cc | 2 +- .../compute/kernels/scalar_temporal_test.cc | 2 +- .../compute/kernels/scalar_validity_test.cc | 2 +- .../arrow/compute/kernels/select_k_test.cc | 2 +- .../{test_util.cc => test_util_internal.cc} | 58 +- .../{test_util.h => test_util_internal.h} | 3 +- .../kernels/vector_cumulative_ops_test.cc | 2 +- .../arrow/compute/kernels/vector_hash_test.cc | 2 +- .../compute/kernels/vector_nested_test.cc | 2 +- .../compute/kernels/vector_pairwise_test.cc | 2 +- .../compute/kernels/vector_replace_test.cc | 2 +- .../compute/kernels/vector_selection_test.cc | 2 +- .../arrow/compute/kernels/vector_sort_test.cc | 2 +- .../compute/kernels/vector_swizzle_test.cc | 2 +- cpp/src/arrow/compute/row/grouper_test.cc | 940 +++++++++++++++++- cpp/src/arrow/compute/test_util_internal.cc | 123 +++ cpp/src/arrow/compute/test_util_internal.h | 42 + cpp/src/arrow/dataset/file_test.cc | 6 +- cpp/src/arrow/dataset/test_util_internal.cc | 30 - cpp/src/arrow/dataset/test_util_internal.h | 10 - 42 files changed, 1185 insertions(+), 1177 deletions(-) rename cpp/src/arrow/compute/kernels/{test_util.cc => test_util_internal.cc} (89%) rename cpp/src/arrow/compute/kernels/{test_util.h => test_util_internal.h} (99%) create mode 100644 cpp/src/arrow/compute/test_util_internal.cc create mode 100644 cpp/src/arrow/compute/test_util_internal.h diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index 0a2536b11e33c..7c58df7285648 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -109,7 +109,8 @@ endforeach() if(ARROW_TESTING) # test_nodes.cc isn't used by all tests but link to it for simple # CMakeLists.txt. - add_library(arrow_acero_testing OBJECT test_nodes.cc test_util_internal.cc) + add_library(arrow_acero_testing OBJECT test_nodes.cc test_util_internal.cc + ../compute/test_util_internal.cc) # Even though this is still just an object library we still need to "link" our # dependencies so that include paths are configured correctly target_link_libraries(arrow_acero_testing PRIVATE ${ARROW_ACERO_TEST_LINK_LIBS}) diff --git a/cpp/src/arrow/acero/aggregate_node.h b/cpp/src/arrow/acero/aggregate_node.h index 790264b208305..0c6fea16a8acc 100644 --- a/cpp/src/arrow/acero/aggregate_node.h +++ b/cpp/src/arrow/acero/aggregate_node.h @@ -24,6 +24,7 @@ #include "arrow/acero/visibility.h" #include "arrow/compute/api_aggregate.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/compute/type_fwd.h" #include "arrow/result.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc index c623271db9fb4..83f877106b6df 100644 --- a/cpp/src/arrow/acero/aggregate_node_test.cc +++ b/cpp/src/arrow/acero/aggregate_node_test.cc @@ -24,6 +24,7 @@ #include "arrow/acero/test_util_internal.h" #include "arrow/compute/api_aggregate.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/result.h" #include "arrow/table.h" #include "arrow/testing/gtest_util.h" diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index 64d41ccb1ab20..52d58ec4e74e9 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -41,8 +41,9 @@ #include "arrow/acero/util.h" #include "arrow/api.h" #include "arrow/compute/api_scalar.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/cast.h" #include "arrow/compute/row/row_encoder_internal.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/testing/random.h" diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index 1e2975afc91b3..769ac2164b348 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -42,7 +42,6 @@ #include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/registry.h" #include "arrow/compute/row/grouper.h" -#include "arrow/compute/row/grouper_internal.h" #include "arrow/table.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" @@ -159,8 +158,6 @@ TEST(AggregateSchema, SingleKeyAndSegmentKey) { output_schema); } -namespace { - using GroupByFunction = std::function( const std::vector&, const std::vector&, const std::vector&, const std::vector&, bool, bool)>; @@ -538,930 +535,6 @@ Result GroupByTest(GroupByFunction group_by, const std::vector& ar return GroupByTest(group_by, arguments, keys, {}, aggregates, use_threads); } -template -void TestGroupClassSupportedKeys( - std::function>(const std::vector&)> - make_func) { - ASSERT_OK(make_func({boolean()})); - - ASSERT_OK(make_func({int8(), uint16(), int32(), uint64()})); - - ASSERT_OK(make_func({dictionary(int64(), utf8())})); - - ASSERT_OK(make_func({float16(), float32(), float64()})); - - ASSERT_OK(make_func({utf8(), binary(), large_utf8(), large_binary()})); - - ASSERT_OK(make_func({fixed_size_binary(16), fixed_size_binary(32)})); - - ASSERT_OK(make_func({decimal128(32, 10), decimal256(76, 20)})); - - ASSERT_OK(make_func({date32(), date64()})); - - for (auto unit : { - TimeUnit::SECOND, - TimeUnit::MILLI, - TimeUnit::MICRO, - TimeUnit::NANO, - }) { - ASSERT_OK(make_func({timestamp(unit), duration(unit)})); - } - - ASSERT_OK( - make_func({day_time_interval(), month_interval(), month_day_nano_interval()})); - - ASSERT_OK(make_func({null()})); - - ASSERT_RAISES(NotImplemented, make_func({struct_({field("", int64())})})); - - ASSERT_RAISES(NotImplemented, make_func({struct_({})})); - - ASSERT_RAISES(NotImplemented, make_func({list(int32())})); - - ASSERT_RAISES(NotImplemented, make_func({fixed_size_list(int32(), 5)})); - - ASSERT_RAISES(NotImplemented, make_func({dense_union({field("", int32())})})); -} - -void TestSegments(std::unique_ptr& segmenter, const ExecSpan& batch, - std::vector expected_segments) { - ASSERT_OK_AND_ASSIGN(auto actual_segments, segmenter->GetSegments(batch)); - ASSERT_EQ(actual_segments.size(), expected_segments.size()); - for (size_t i = 0; i < actual_segments.size(); ++i) { - SCOPED_TRACE("segment #" + ToChars(i)); - ASSERT_EQ(actual_segments[i], expected_segments[i]); - } -} - -Result> MakeGrouper(const std::vector& key_types) { - return Grouper::Make(key_types, default_exec_context()); -} - -Result> MakeRowSegmenter( - const std::vector& key_types) { - return RowSegmenter::Make(key_types, /*nullable_leys=*/false, default_exec_context()); -} - -Result> MakeGenericSegmenter( - const std::vector& key_types) { - return MakeAnyKeysSegmenter(key_types, default_exec_context()); -} - -} // namespace - -TEST(RowSegmenter, SupportedKeys) { - TestGroupClassSupportedKeys(MakeRowSegmenter); -} - -TEST(RowSegmenter, Basics) { - std::vector bad_types2 = {int32(), float32()}; - std::vector types2 = {int32(), int32()}; - std::vector bad_types1 = {float32()}; - std::vector types1 = {int32()}; - std::vector types0 = {}; - auto batch2 = ExecBatchFromJSON(types2, "[[1, 1], [1, 2], [2, 2]]"); - auto batch1 = ExecBatchFromJSON(types1, "[[1], [1], [2]]"); - ExecBatch batch0({}, 3); - { - SCOPED_TRACE("types0 segmenting of batch2"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types0)); - ExecSpan span2(batch2); - EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 0 "), - segmenter->GetSegments(span2)); - ExecSpan span0(batch0); - TestSegments(segmenter, span0, {{0, 3, true, true}}); - } - { - SCOPED_TRACE("bad_types1 segmenting of batch1"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types1)); - ExecSpan span1(batch1); - EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 0 of type "), - segmenter->GetSegments(span1)); - } - { - SCOPED_TRACE("types1 segmenting of batch2"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types1)); - ExecSpan span2(batch2); - EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 1 "), - segmenter->GetSegments(span2)); - ExecSpan span1(batch1); - TestSegments(segmenter, span1, {{0, 2, false, true}, {2, 1, true, false}}); - } - { - SCOPED_TRACE("bad_types2 segmenting of batch2"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(bad_types2)); - ExecSpan span2(batch2); - EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch value 1 of type "), - segmenter->GetSegments(span2)); - } - { - SCOPED_TRACE("types2 segmenting of batch1"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types2)); - ExecSpan span1(batch1); - EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("expected batch size 2 "), - segmenter->GetSegments(span1)); - ExecSpan span2(batch2); - TestSegments(segmenter, span2, - {{0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}}); - } -} - -TEST(RowSegmenter, NonOrdered) { - for (int num_keys = 1; num_keys <= 2; ++num_keys) { - SCOPED_TRACE("non-ordered " + ToChars(num_keys) + " int32(s)"); - std::vector types(num_keys, int32()); - std::vector values(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 1, 2]")); - ExecBatch batch(std::move(values), 5); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 1, true, false}}); - } -} - -TEST(RowSegmenter, EmptyBatches) { - { - SCOPED_TRACE("empty batches {int32}"); - std::vector types = {int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), - }; - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {}); - TestSegments(segmenter, ExecSpan(batches[1]), {}); - TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[3]), {}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[5]), {}); - TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[7]), {}); - } - { - SCOPED_TRACE("empty batches {int32, int32}"); - std::vector types = {int32(), int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1, 1]]"), - ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1, 1]]"), - ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[2, 2], [2, 2]]"), - ExecBatchFromJSON(types, "[]"), - }; - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {}); - TestSegments(segmenter, ExecSpan(batches[1]), {}); - TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[3]), {}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[5]), {}); - TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[7]), {}); - } -} - -TEST(RowSegmenter, MultipleSegments) { - auto test_with_keys = [](int num_keys, const std::shared_ptr& key) { - SCOPED_TRACE("multiple segments " + ToChars(num_keys) + " " + - key->type()->ToString()); - std::vector types(num_keys, key->type()); - std::vector values(num_keys, key); - ExecBatch batch(std::move(values), key->length()); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 2, false, false}, - {6, 2, false, false}, - {8, 1, true, false}}); - }; - for (int num_keys = 1; num_keys <= 2; ++num_keys) { - test_with_keys(num_keys, ArrayFromJSON(int32(), "[1, 1, 2, 5, 3, 3, 5, 5, 4]")); - test_with_keys( - num_keys, - ArrayFromJSON(fixed_size_binary(2), - R"(["aa", "aa", "bb", "ee", "cc", "cc", "ee", "ee", "dd"])")); - test_with_keys(num_keys, DictArrayFromJSON(dictionary(int8(), utf8()), - "[0, 0, 1, 4, 2, 2, 4, 4, 3]", - R"(["a", "b", "c", "d", "e"])")); - } -} - -TEST(RowSegmenter, MultipleSegmentsMultipleBatches) { - { - SCOPED_TRACE("multiple segments multiple batches {int32}"); - std::vector types = {int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"), - ExecBatchFromJSON(types, "[[5], [3]]"), - ExecBatchFromJSON(types, "[[3], [5], [5]]"), ExecBatchFromJSON(types, "[[4]]")}; - - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[1]), - {{0, 1, false, true}, {1, 1, true, false}}); - TestSegments(segmenter, ExecSpan(batches[2]), - {{0, 1, false, false}, {1, 1, true, false}}); - TestSegments(segmenter, ExecSpan(batches[3]), - {{0, 1, false, true}, {1, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); - } - { - SCOPED_TRACE("multiple segments multiple batches {int32, int32}"); - std::vector types = {int32(), int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[[1, 1]]"), - ExecBatchFromJSON(types, "[[1, 1], [2, 2]]"), - ExecBatchFromJSON(types, "[[5, 5], [3, 3]]"), - ExecBatchFromJSON(types, "[[3, 3], [5, 5], [5, 5]]"), - ExecBatchFromJSON(types, "[[4, 4]]")}; - - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[1]), - {{0, 1, false, true}, {1, 1, true, false}}); - TestSegments(segmenter, ExecSpan(batches[2]), - {{0, 1, false, false}, {1, 1, true, false}}); - TestSegments(segmenter, ExecSpan(batches[3]), - {{0, 1, false, true}, {1, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); - } -} - -namespace { - -void TestRowSegmenterConstantBatch( - const std::shared_ptr& type, - std::function shape_func, - std::function>(int64_t key)> value_func, - std::function>(const std::vector&)> - make_segmenter) { - constexpr int64_t n_keys = 3, n_rows = 3, repetitions = 3; - std::vector types(n_keys, type); - std::vector full_values(n_keys); - for (int64_t i = 0; i < n_keys; i++) { - auto shape = shape_func(i); - ASSERT_OK_AND_ASSIGN(auto scalar, value_func(i)); - if (shape == ArgShape::SCALAR) { - full_values[i] = std::move(scalar); - } else { - ASSERT_OK_AND_ASSIGN(full_values[i], MakeArrayFromScalar(*scalar, n_rows)); - } - } - auto test_with_keys = [&](int64_t keys) -> Status { - SCOPED_TRACE("constant-batch with " + ToChars(keys) + " key(s)"); - std::vector values(full_values.begin(), full_values.begin() + keys); - ExecBatch batch(values, n_rows); - std::vector key_types(types.begin(), types.begin() + keys); - ARROW_ASSIGN_OR_RAISE(auto segmenter, make_segmenter(key_types)); - for (int64_t i = 0; i < repetitions; i++) { - TestSegments(segmenter, ExecSpan(batch), {{0, n_rows, true, true}}); - ARROW_RETURN_NOT_OK(segmenter->Reset()); - } - return Status::OK(); - }; - for (int64_t i = 0; i <= n_keys; i++) { - ASSERT_OK(test_with_keys(i)); - } -} - -} // namespace - -TEST(RowSegmenter, ConstantArrayBatch) { - TestRowSegmenterConstantBatch( - int32(), [](int64_t key) { return ArgShape::ARRAY; }, - [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantScalarBatch) { - TestRowSegmenterConstantBatch( - int32(), [](int64_t key) { return ArgShape::SCALAR; }, - [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantMixedBatch) { - TestRowSegmenterConstantBatch( - int32(), - [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; }, - [](int64_t key) { return MakeScalar(1); }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantArrayBatchWithAnyKeysSegmenter) { - TestRowSegmenterConstantBatch( - int32(), [](int64_t key) { return ArgShape::ARRAY; }, - [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantScalarBatchWithAnyKeysSegmenter) { - TestRowSegmenterConstantBatch( - int32(), [](int64_t key) { return ArgShape::SCALAR; }, - [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantMixedBatchWithAnyKeysSegmenter) { - TestRowSegmenterConstantBatch( - int32(), - [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; }, - [](int64_t key) { return MakeScalar(1); }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatch) { - constexpr int fsb = 8; - auto type = fixed_size_binary(fsb); - ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X'))); - TestRowSegmenterConstantBatch( - type, [](int64_t key) { return ArgShape::ARRAY; }, - [&](int64_t key) { return value; }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatch) { - constexpr int fsb = 8; - auto type = fixed_size_binary(fsb); - ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X'))); - TestRowSegmenterConstantBatch( - fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; }, - [&](int64_t key) { return value; }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatch) { - constexpr int fsb = 8; - auto type = fixed_size_binary(fsb); - ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X'))); - TestRowSegmenterConstantBatch( - fixed_size_binary(8), - [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; }, - [&](int64_t key) { return value; }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantFixedSizeBinaryArrayBatchWithAnyKeysSegmenter) { - constexpr int fsb = 8; - auto type = fixed_size_binary(fsb); - ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X'))); - TestRowSegmenterConstantBatch( - type, [](int64_t key) { return ArgShape::ARRAY; }, - [&](int64_t key) { return value; }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantFixedSizeBinaryScalarBatchWithAnyKeysSegmenter) { - constexpr int fsb = 8; - auto type = fixed_size_binary(fsb); - ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X'))); - TestRowSegmenterConstantBatch( - fixed_size_binary(8), [](int64_t key) { return ArgShape::SCALAR; }, - [&](int64_t key) { return value; }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantFixedSizeBinaryMixedBatchWithAnyKeysSegmenter) { - constexpr int fsb = 8; - auto type = fixed_size_binary(fsb); - ASSERT_OK_AND_ASSIGN(auto value, MakeScalar(type, std::string(fsb, 'X'))); - TestRowSegmenterConstantBatch( - fixed_size_binary(8), - [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; }, - [&](int64_t key) { return value; }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantDictionaryArrayBatch) { - auto index_type = int32(); - auto value_type = utf8(); - auto dict_type = dictionary(index_type, value_type); - auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])"); - ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0)); - auto dict_value = DictionaryScalar::Make(std::move(index_value), dict); - TestRowSegmenterConstantBatch( - dict_type, [](int64_t key) { return ArgShape::ARRAY; }, - [&](int64_t key) { return dict_value; }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantDictionaryScalarBatch) { - auto index_type = int32(); - auto value_type = utf8(); - auto dict_type = dictionary(index_type, value_type); - auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])"); - ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0)); - auto dict_value = DictionaryScalar::Make(std::move(index_value), dict); - TestRowSegmenterConstantBatch( - dict_type, [](int64_t key) { return ArgShape::SCALAR; }, - [&](int64_t key) { return dict_value; }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantDictionaryMixedBatch) { - auto index_type = int32(); - auto value_type = utf8(); - auto dict_type = dictionary(index_type, value_type); - auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])"); - ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0)); - auto dict_value = DictionaryScalar::Make(std::move(index_value), dict); - TestRowSegmenterConstantBatch( - dict_type, - [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; }, - [&](int64_t key) { return dict_value; }, MakeRowSegmenter); -} - -TEST(RowSegmenter, ConstantDictionaryArrayBatchWithAnyKeysSegmenter) { - auto index_type = int32(); - auto value_type = utf8(); - auto dict_type = dictionary(index_type, value_type); - auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])"); - ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0)); - auto dict_value = DictionaryScalar::Make(std::move(index_value), dict); - TestRowSegmenterConstantBatch( - dict_type, [](int64_t key) { return ArgShape::ARRAY; }, - [&](int64_t key) { return dict_value; }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantDictionaryScalarBatchWithAnyKeysSegmenter) { - auto index_type = int32(); - auto value_type = utf8(); - auto dict_type = dictionary(index_type, value_type); - auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])"); - ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0)); - auto dict_value = DictionaryScalar::Make(std::move(index_value), dict); - TestRowSegmenterConstantBatch( - dict_type, [](int64_t key) { return ArgShape::SCALAR; }, - [&](int64_t key) { return dict_value; }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, ConstantDictionaryMixedBatchWithAnyKeysSegmenter) { - auto index_type = int32(); - auto value_type = utf8(); - auto dict_type = dictionary(index_type, value_type); - auto dict = ArrayFromJSON(value_type, R"(["alpha", null, "gamma"])"); - ASSERT_OK_AND_ASSIGN(auto index_value, MakeScalar(index_type, 0)); - auto dict_value = DictionaryScalar::Make(std::move(index_value), dict); - TestRowSegmenterConstantBatch( - dict_type, - [](int64_t key) { return key % 2 == 0 ? ArgShape::SCALAR : ArgShape::ARRAY; }, - [&](int64_t key) { return dict_value; }, MakeGenericSegmenter); -} - -TEST(RowSegmenter, RowConstantBatch) { - constexpr size_t n = 3; - std::vector types = {int32(), int32(), int32()}; - auto full_batch = ExecBatchFromJSON(types, "[[1, 1, 1], [2, 2, 2], [3, 3, 3]]"); - std::vector expected_segments_for_size_0 = {{0, 3, true, true}}; - std::vector expected_segments = { - {0, 1, false, true}, {1, 1, false, false}, {2, 1, true, false}}; - auto test_by_size = [&](size_t size) -> Status { - SCOPED_TRACE("constant-batch with " + ToChars(size) + " key(s)"); - std::vector values(full_batch.values.begin(), - full_batch.values.begin() + size); - ExecBatch batch(values, full_batch.length); - std::vector key_types(types.begin(), types.begin() + size); - ARROW_ASSIGN_OR_RAISE(auto segmenter, MakeRowSegmenter(key_types)); - TestSegments(segmenter, ExecSpan(batch), - size == 0 ? expected_segments_for_size_0 : expected_segments); - return Status::OK(); - }; - for (size_t i = 0; i <= n; i++) { - ASSERT_OK(test_by_size(i)); - } -} - -TEST(Grouper, SupportedKeys) { TestGroupClassSupportedKeys(MakeGrouper); } - -struct TestGrouper { - explicit TestGrouper(std::vector types, std::vector shapes = {}) - : types_(std::move(types)), shapes_(std::move(shapes)) { - grouper_ = Grouper::Make(types_).ValueOrDie(); - - FieldVector fields; - for (const auto& type : types_) { - fields.push_back(field("", type.GetSharedPtr())); - } - key_schema_ = schema(std::move(fields)); - } - - void ExpectConsume(const std::string& key_json, const std::string& expected) { - auto expected_arr = ArrayFromJSON(uint32(), expected); - if (shapes_.size() > 0) { - ExpectConsume(ExecBatchFromJSON(types_, shapes_, key_json), expected_arr); - } else { - ExpectConsume(ExecBatchFromJSON(types_, key_json), expected_arr); - } - } - - void ExpectConsume(const std::vector& key_values, Datum expected) { - ASSERT_OK_AND_ASSIGN(auto key_batch, ExecBatch::Make(key_values)); - ExpectConsume(key_batch, expected); - } - - void ExpectConsume(const ExecBatch& key_batch, Datum expected) { - Datum ids; - ConsumeAndValidate(key_batch, &ids); - AssertEquivalentIds(expected, ids); - } - - void ExpectUniques(const ExecBatch& uniques) { - EXPECT_THAT(grouper_->GetUniques(), ResultWith(Eq(uniques))); - } - - void ExpectUniques(const std::string& uniques_json) { - if (shapes_.size() > 0) { - ExpectUniques(ExecBatchFromJSON(types_, shapes_, uniques_json)); - } else { - ExpectUniques(ExecBatchFromJSON(types_, uniques_json)); - } - } - - void AssertEquivalentIds(const Datum& expected, const Datum& actual) { - auto left = expected.make_array(); - auto right = actual.make_array(); - ASSERT_EQ(left->length(), right->length()) << "#ids unequal"; - int64_t num_ids = left->length(); - auto left_data = left->data(); - auto right_data = right->data(); - auto left_ids = reinterpret_cast(left_data->buffers[1]->data()); - auto right_ids = reinterpret_cast(right_data->buffers[1]->data()); - uint32_t max_left_id = 0; - uint32_t max_right_id = 0; - for (int64_t i = 0; i < num_ids; ++i) { - if (left_ids[i] > max_left_id) { - max_left_id = left_ids[i]; - } - if (right_ids[i] > max_right_id) { - max_right_id = right_ids[i]; - } - } - std::vector right_to_left_present(max_right_id + 1, false); - std::vector left_to_right_present(max_left_id + 1, false); - std::vector right_to_left(max_right_id + 1); - std::vector left_to_right(max_left_id + 1); - for (int64_t i = 0; i < num_ids; ++i) { - uint32_t left_id = left_ids[i]; - uint32_t right_id = right_ids[i]; - if (!left_to_right_present[left_id]) { - left_to_right[left_id] = right_id; - left_to_right_present[left_id] = true; - } - if (!right_to_left_present[right_id]) { - right_to_left[right_id] = left_id; - right_to_left_present[right_id] = true; - } - ASSERT_EQ(left_id, right_to_left[right_id]); - ASSERT_EQ(right_id, left_to_right[left_id]); - } - } - - void ConsumeAndValidate(const ExecBatch& key_batch, Datum* ids = nullptr) { - ASSERT_OK_AND_ASSIGN(Datum id_batch, grouper_->Consume(ExecSpan(key_batch))); - - ValidateConsume(key_batch, id_batch); - - if (ids) { - *ids = std::move(id_batch); - } - } - - void ValidateConsume(const ExecBatch& key_batch, const Datum& id_batch) { - if (uniques_.length == -1) { - ASSERT_OK_AND_ASSIGN(uniques_, grouper_->GetUniques()); - } else if (static_cast(grouper_->num_groups()) > uniques_.length) { - ASSERT_OK_AND_ASSIGN(ExecBatch new_uniques, grouper_->GetUniques()); - - // check that uniques_ are prefixes of new_uniques - for (int i = 0; i < uniques_.num_values(); ++i) { - auto new_unique = new_uniques[i].make_array(); - ValidateOutput(*new_unique); - - AssertDatumsEqual(uniques_[i], new_unique->Slice(0, uniques_.length), - /*verbose=*/true); - } - - uniques_ = std::move(new_uniques); - } - - // check that the ids encode an equivalent key sequence - auto ids = id_batch.make_array(); - ValidateOutput(*ids); - - for (int i = 0; i < key_batch.num_values(); ++i) { - SCOPED_TRACE(ToChars(i) + "th key array"); - auto original = - key_batch[i].is_array() - ? key_batch[i].make_array() - : *MakeArrayFromScalar(*key_batch[i].scalar(), key_batch.length); - ASSERT_OK_AND_ASSIGN(auto encoded, Take(*uniques_[i].make_array(), *ids)); - AssertArraysEqual(*original, *encoded, /*verbose=*/true, - EqualOptions().nans_equal(true)); - } - } - - std::vector types_; - std::vector shapes_; - std::shared_ptr key_schema_; - std::unique_ptr grouper_; - ExecBatch uniques_ = ExecBatch({}, -1); -}; - -TEST(Grouper, BooleanKey) { - TestGrouper g({boolean()}); - - g.ExpectConsume("[[true], [true]]", "[0, 0]"); - - g.ExpectConsume("[[true], [true]]", "[0, 0]"); - - g.ExpectConsume("[[false], [null]]", "[1, 2]"); - - g.ExpectConsume("[[true], [false], [true], [false], [null], [false], [null]]", - "[0, 1, 0, 1, 2, 1, 2]"); -} - -TEST(Grouper, NumericKey) { - for (auto ty : { - uint8(), - int8(), - uint16(), - int16(), - uint32(), - int32(), - uint64(), - int64(), - float16(), - float32(), - float64(), - }) { - SCOPED_TRACE("key type: " + ty->ToString()); - - TestGrouper g({ty}); - - g.ExpectConsume("[[3], [3]]", "[0, 0]"); - g.ExpectUniques("[[3]]"); - - g.ExpectConsume("[[3], [3]]", "[0, 0]"); - g.ExpectUniques("[[3]]"); - - g.ExpectConsume("[[27], [81], [81]]", "[1, 2, 2]"); - g.ExpectUniques("[[3], [27], [81]]"); - - g.ExpectConsume("[[3], [27], [3], [27], [null], [81], [27], [81]]", - "[0, 1, 0, 1, 3, 2, 1, 2]"); - g.ExpectUniques("[[3], [27], [81], [null]]"); - } -} - -TEST(Grouper, FloatingPointKey) { - TestGrouper g({float32()}); - - // -0.0 hashes differently from 0.0 - g.ExpectConsume("[[0.0], [-0.0]]", "[0, 1]"); - - g.ExpectConsume("[[Inf], [-Inf]]", "[2, 3]"); - - // assert(!(NaN == NaN)) does not cause spurious new groups - g.ExpectConsume("[[NaN], [NaN]]", "[4, 4]"); - - // TODO(bkietz) test denormal numbers, more NaNs -} - -TEST(Grouper, StringKey) { - for (auto ty : {utf8(), large_utf8(), fixed_size_binary(2)}) { - SCOPED_TRACE("key type: " + ty->ToString()); - - TestGrouper g({ty}); - - g.ExpectConsume(R"([["eh"], ["eh"]])", "[0, 0]"); - - g.ExpectConsume(R"([["eh"], ["eh"]])", "[0, 0]"); - - g.ExpectConsume(R"([["be"], [null]])", "[1, 2]"); - } -} - -TEST(Grouper, DictKey) { - TestGrouper g({dictionary(int32(), utf8())}); - - // For dictionary keys, all batches must share a single dictionary. - // Eventually, differing dictionaries will be unified and indices transposed - // during encoding to relieve this restriction. - const auto dict = ArrayFromJSON(utf8(), R"(["ex", "why", "zee", null])"); - - auto WithIndices = [&](const std::string& indices) { - return Datum(*DictionaryArray::FromArrays(ArrayFromJSON(int32(), indices), dict)); - }; - - // NB: null index is not considered equivalent to index=3 (which encodes null in dict) - g.ExpectConsume({WithIndices(" [3, 1, null, 0, 2]")}, - ArrayFromJSON(uint32(), "[0, 1, 2, 3, 4]")); - - g = TestGrouper({dictionary(int32(), utf8())}); - - g.ExpectConsume({WithIndices(" [0, 1, 2, 3, null]")}, - ArrayFromJSON(uint32(), "[0, 1, 2, 3, 4]")); - - g.ExpectConsume({WithIndices(" [3, 1, null, 0, 2]")}, - ArrayFromJSON(uint32(), "[3, 1, 4, 0, 2]")); - - auto dict_arr = *DictionaryArray::FromArrays( - ArrayFromJSON(int32(), "[0, 1]"), - ArrayFromJSON(utf8(), R"(["different", "dictionary"])")); - ExecSpan dict_span({*dict_arr->data()}, 2); - EXPECT_RAISES_WITH_MESSAGE_THAT(NotImplemented, - HasSubstr("Unifying differing dictionaries"), - g.grouper_->Consume(dict_span)); -} - -TEST(Grouper, StringInt64Key) { - TestGrouper g({utf8(), int64()}); - - g.ExpectConsume(R"([["eh", 0], ["eh", 0]])", "[0, 0]"); - - g.ExpectConsume(R"([["eh", 0], ["eh", null]])", "[0, 1]"); - - g.ExpectConsume(R"([["eh", 1], ["bee", 1]])", "[2, 3]"); - - g.ExpectConsume(R"([["eh", null], ["bee", 1]])", "[1, 3]"); - - g = TestGrouper({utf8(), int64()}); - - g.ExpectConsume(R"([ - ["ex", 0], - ["ex", 0], - ["why", 0], - ["ex", 1], - ["why", 0], - ["ex", 1], - ["ex", 0], - ["why", 1] - ])", - "[0, 0, 1, 2, 1, 2, 0, 3]"); - - g.ExpectConsume(R"([ - ["ex", 0], - [null, 0], - [null, 0], - ["ex", 1], - [null, null], - ["ex", 1], - ["ex", 0], - ["why", null] - ])", - "[0, 4, 4, 2, 5, 2, 0, 6]"); -} - -TEST(Grouper, DoubleStringInt64Key) { - TestGrouper g({float64(), utf8(), int64()}); - - g.ExpectConsume(R"([[1.5, "eh", 0], [1.5, "eh", 0]])", "[0, 0]"); - - g.ExpectConsume(R"([[1.5, "eh", 0], [1.5, "eh", 0]])", "[0, 0]"); - - g.ExpectConsume(R"([[1.0, "eh", 0], [1.0, "be", null]])", "[1, 2]"); - - // note: -0 and +0 hash differently - g.ExpectConsume(R"([[-0.0, "be", 7], [0.0, "be", 7]])", "[3, 4]"); -} - -TEST(Grouper, RandomInt64Keys) { - TestGrouper g({int64()}); - for (int i = 0; i < 4; ++i) { - SCOPED_TRACE(ToChars(i) + "th key batch"); - - ExecBatch key_batch{ - *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)}; - g.ConsumeAndValidate(key_batch); - } -} - -TEST(Grouper, RandomStringInt64Keys) { - TestGrouper g({utf8(), int64()}); - for (int i = 0; i < 4; ++i) { - SCOPED_TRACE(ToChars(i) + "th key batch"); - - ExecBatch key_batch{ - *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)}; - g.ConsumeAndValidate(key_batch); - } -} - -TEST(Grouper, RandomStringInt64DoubleInt32Keys) { - TestGrouper g({utf8(), int64(), float64(), int32()}); - for (int i = 0; i < 4; ++i) { - SCOPED_TRACE(ToChars(i) + "th key batch"); - - ExecBatch key_batch{ - *random::GenerateBatch(g.key_schema_->fields(), 1 << 12, 0xDEADBEEF)}; - g.ConsumeAndValidate(key_batch); - } -} - -TEST(Grouper, NullKeys) { - TestGrouper g({null()}); - g.ExpectConsume("[[null], [null]]", "[0, 0]"); -} - -TEST(Grouper, MultipleNullKeys) { - TestGrouper g({null(), null(), null(), null()}); - g.ExpectConsume("[[null, null, null, null], [null, null, null, null]]", "[0, 0]"); -} - -TEST(Grouper, Int64NullKeys) { - TestGrouper g({int64(), null()}); - g.ExpectConsume("[[1, null], [2, null], [1, null]]", "[0, 1, 0]"); -} - -TEST(Grouper, StringNullKeys) { - TestGrouper g({utf8(), null()}); - g.ExpectConsume(R"([["be", null], ["eh", null]])", "[0, 1]"); -} - -TEST(Grouper, DoubleNullStringKey) { - TestGrouper g({float64(), null(), utf8()}); - - g.ExpectConsume(R"([[1.5, null, "eh"], [1.5, null, "eh"]])", "[0, 0]"); - g.ExpectConsume(R"([[null, null, "eh"], [1.0, null, null]])", "[1, 2]"); - g.ExpectConsume(R"([ - [1.0, null, "wh"], - [4.4, null, null], - [5.2, null, "eh"], - [6.5, null, "be"], - [7.3, null, null], - [1.0, null, "wh"], - [9.1, null, "eh"], - [10.2, null, "be"], - [1.0, null, null] - ])", - "[3, 4, 5, 6, 7, 3, 8, 9, 2]"); -} - -TEST(Grouper, EmptyNullKeys) { - TestGrouper g({null()}); - g.ExpectConsume("[]", "[]"); -} - -TEST(Grouper, MakeGroupings) { - auto ExpectGroupings = [](std::string ids_json, std::string expected_json) { - auto ids = checked_pointer_cast(ArrayFromJSON(uint32(), ids_json)); - auto expected = ArrayFromJSON(list(int32()), expected_json); - - auto num_groups = static_cast(expected->length()); - ASSERT_OK_AND_ASSIGN(auto actual, Grouper::MakeGroupings(*ids, num_groups)); - AssertArraysEqual(*expected, *actual, /*verbose=*/true); - - // validate ApplyGroupings - ASSERT_OK_AND_ASSIGN(auto grouped_ids, Grouper::ApplyGroupings(*actual, *ids)); - - for (uint32_t group = 0; group < num_groups; ++group) { - auto ids_slice = checked_pointer_cast(grouped_ids->value_slice(group)); - for (auto slot : *ids_slice) { - EXPECT_EQ(slot, group); - } - } - }; - - ExpectGroupings("[]", "[[]]"); - - ExpectGroupings("[0, 0, 0]", "[[0, 1, 2]]"); - - ExpectGroupings("[0, 0, 0, 1, 1, 2]", "[[0, 1, 2], [3, 4], [5], []]"); - - ExpectGroupings("[2, 1, 2, 1, 1, 2]", "[[], [1, 3, 4], [0, 2, 5], [], []]"); - - ExpectGroupings("[2, 2, 5, 5, 2, 3]", "[[], [], [0, 1, 4], [5], [], [2, 3], [], []]"); - - auto ids = checked_pointer_cast(ArrayFromJSON(uint32(), "[0, null, 1]")); - EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, HasSubstr("MakeGroupings with null ids"), - Grouper::MakeGroupings(*ids, 5)); -} - -TEST(Grouper, ScalarValues) { - // large_utf8 forces GrouperImpl over GrouperFastImpl - for (const auto& str_type : {utf8(), large_utf8()}) { - { - TestGrouper g( - {boolean(), int32(), decimal128(3, 2), decimal256(3, 2), fixed_size_binary(2), - str_type, int32()}, - {ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::SCALAR, - ArgShape::SCALAR, ArgShape::SCALAR, ArgShape::ARRAY}); - g.ExpectConsume( - R"([ -[true, 1, "1.00", "2.00", "ab", "foo", 2], -[true, 1, "1.00", "2.00", "ab", "foo", 2], -[true, 1, "1.00", "2.00", "ab", "foo", 3] -])", - "[0, 0, 1]"); - } - { - auto dict_type = dictionary(int32(), utf8()); - TestGrouper g({dict_type, str_type}, {ArgShape::SCALAR, ArgShape::SCALAR}); - const auto dict = R"(["foo", null])"; - g.ExpectConsume( - {DictScalarFromJSON(dict_type, "0", dict), ScalarFromJSON(str_type, R"("")")}, - ArrayFromJSON(uint32(), "[0]")); - g.ExpectConsume( - {DictScalarFromJSON(dict_type, "1", dict), ScalarFromJSON(str_type, R"("")")}, - ArrayFromJSON(uint32(), "[1]")); - } - } -} - void TestSegmentKey(GroupByFunction group_by, const std::shared_ptr& table, Datum output, const std::vector& segment_keys); diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 7dbed7163daca..671257b2e27af 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -26,9 +26,9 @@ #include "arrow/acero/test_util_internal.h" #include "arrow/acero/util.h" #include "arrow/api.h" -#include "arrow/compute/kernels/test_util.h" #include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/row_encoder_internal.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/extension/uuid.h" #include "arrow/testing/extension_type.h" #include "arrow/testing/generator.h" @@ -2350,7 +2350,7 @@ TEST(HashJoin, FineGrainedResidualFilter) { auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([ [null, null, "r_payload"], [null, 0, "r_payload"], - [null, 42, "r_payload"], + [null, 42, "r_payload"], ["both1", null, "r_payload"], ["both2", null, "r_payload"], ["right_only", null, "r_payload"], @@ -2519,7 +2519,7 @@ TEST(HashJoin, FineGrainedResidualFilter) { auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([ [null, null, "r_payload"], [null, 0, "r_payload"], - [null, 42, "r_payload"], + [null, 42, "r_payload"], ["both1", null, "r_payload"], ["both1", 0, "r_payload"], ["both1", 42, "r_payload"], @@ -2704,7 +2704,7 @@ TEST(HashJoin, FineGrainedResidualFilter) { auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([ [null, null, "r_payload"], [null, 0, "r_payload"], - [null, 42, "r_payload"], + [null, 42, "r_payload"], ["right_only", null, "r_payload"], ["right_only", 0, "r_payload"], ["right_only", 42, "r_payload"]])"); @@ -2879,7 +2879,7 @@ TEST(HashJoin, FineGrainedResidualFilter) { auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([ [null, null, "r_payload"], [null, 0, "r_payload"], - [null, 42, "r_payload"], + [null, 42, "r_payload"], ["both1", null, "r_payload"], ["both1", 0, "r_payload"], ["both2", null, "r_payload"], @@ -3054,7 +3054,7 @@ TEST(HashJoin, FineGrainedResidualFilter) { auto expected = ExecBatchFromJSON({utf8(), int32(), utf8()}, R"([ [null, null, "r_payload"], [null, 0, "r_payload"], - [null, 42, "r_payload"], + [null, 42, "r_payload"], ["both1", null, "r_payload"], ["both2", null, "r_payload"], ["right_only", null, "r_payload"], diff --git a/cpp/src/arrow/acero/plan_test.cc b/cpp/src/arrow/acero/plan_test.cc index e74ad6a6665a4..9b0ea8c564ad9 100644 --- a/cpp/src/arrow/acero/plan_test.cc +++ b/cpp/src/arrow/acero/plan_test.cc @@ -27,6 +27,7 @@ #include "arrow/acero/util.h" #include "arrow/compute/exec.h" #include "arrow/compute/expression.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/io/util_internal.h" #include "arrow/record_batch.h" #include "arrow/table.h" diff --git a/cpp/src/arrow/acero/test_util_internal.cc b/cpp/src/arrow/acero/test_util_internal.cc index 107a20354c0e7..312a78fec2324 100644 --- a/cpp/src/arrow/acero/test_util_internal.cc +++ b/cpp/src/arrow/acero/test_util_internal.cc @@ -38,6 +38,7 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" #include "arrow/compute/function_internal.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/datum.h" #include "arrow/io/interfaces.h" #include "arrow/record_batch.h" @@ -64,62 +65,6 @@ using compute::Take; namespace acero { -namespace { - -void ValidateOutputImpl(const ArrayData& output) { - ASSERT_OK(::arrow::internal::ValidateArrayFull(output)); - TestInitialized(output); -} - -void ValidateOutputImpl(const ChunkedArray& output) { - ASSERT_OK(output.ValidateFull()); - for (const auto& chunk : output.chunks()) { - TestInitialized(*chunk); - } -} - -void ValidateOutputImpl(const RecordBatch& output) { - ASSERT_OK(output.ValidateFull()); - for (const auto& column : output.column_data()) { - TestInitialized(*column); - } -} - -void ValidateOutputImpl(const Table& output) { - ASSERT_OK(output.ValidateFull()); - for (const auto& column : output.columns()) { - for (const auto& chunk : column->chunks()) { - TestInitialized(*chunk); - } - } -} - -void ValidateOutputImpl(const Scalar& output) { ASSERT_OK(output.ValidateFull()); } - -} // namespace - -void ValidateOutput(const Datum& output) { - switch (output.kind()) { - case Datum::ARRAY: - ValidateOutputImpl(*output.array()); - break; - case Datum::CHUNKED_ARRAY: - ValidateOutputImpl(*output.chunked_array()); - break; - case Datum::RECORD_BATCH: - ValidateOutputImpl(*output.record_batch()); - break; - case Datum::TABLE: - ValidateOutputImpl(*output.table()); - break; - case Datum::SCALAR: - ValidateOutputImpl(*output.scalar()); - break; - default: - break; - } -} - std::vector HardwareFlagsForTesting() { // Acero currently only has AVX2 optimizations return arrow::GetSupportedHardwareFlags({CpuInfo::AVX2}); @@ -199,36 +144,6 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector& types, std::string_view json) { - auto fields = ::arrow::internal::MapVector( - [](const TypeHolder& th) { return field("", th.GetSharedPtr()); }, types); - - ExecBatch batch{*RecordBatchFromJSON(schema(std::move(fields)), json)}; - - return batch; -} - -ExecBatch ExecBatchFromJSON(const std::vector& types, - const std::vector& shapes, std::string_view json) { - DCHECK_EQ(types.size(), shapes.size()); - - ExecBatch batch = ExecBatchFromJSON(types, json); - - auto value_it = batch.values.begin(); - for (ArgShape shape : shapes) { - if (shape == ArgShape::SCALAR) { - if (batch.length == 0) { - *value_it = MakeNullScalar(value_it->type()); - } else { - *value_it = value_it->make_array()->GetScalar(0).ValueOrDie(); - } - } - ++value_it; - } - - return batch; -} - Future<> StartAndFinish(ExecPlan* plan) { RETURN_NOT_OK(plan->Validate()); plan->StartProducing(); diff --git a/cpp/src/arrow/acero/test_util_internal.h b/cpp/src/arrow/acero/test_util_internal.h index 569fb1254db4a..2367524a5600c 100644 --- a/cpp/src/arrow/acero/test_util_internal.h +++ b/cpp/src/arrow/acero/test_util_internal.h @@ -36,8 +36,6 @@ namespace arrow::acero { -void ValidateOutput(const Datum& output); - // Enumerate all hardware flags that can be tested on this platform // and would lead to different code paths being tested in Acero. std::vector HardwareFlagsForTesting(); @@ -50,16 +48,6 @@ ExecNode* MakeDummyNode(ExecPlan* plan, std::string label, std::vector& types, std::string_view json); - -/// \brief Shape qualifier for value types. In certain instances -/// (e.g. "map_lookup" kernel), an argument may only be a scalar, where in -/// other kernels arguments can be arrays or scalars -enum class ArgShape { ANY, ARRAY, SCALAR }; - -ExecBatch ExecBatchFromJSON(const std::vector& types, - const std::vector& shapes, std::string_view json); - struct BatchesWithSchema { std::vector batches; std::shared_ptr schema; diff --git a/cpp/src/arrow/acero/tpch_node_test.cc b/cpp/src/arrow/acero/tpch_node_test.cc index 17fb43452bc58..f484d6c9d523e 100644 --- a/cpp/src/arrow/acero/tpch_node_test.cc +++ b/cpp/src/arrow/acero/tpch_node_test.cc @@ -27,7 +27,6 @@ #include "arrow/acero/test_util_internal.h" #include "arrow/acero/tpch_node.h" #include "arrow/acero/util.h" -#include "arrow/compute/kernels/test_util.h" #include "arrow/compute/row/row_encoder_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index ca811dac041fe..6deb2cbad8cb3 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -28,6 +28,14 @@ endif() # Unit tests # +# Define arrow_compute_testing object library for common test files +if(ARROW_TESTING) + add_library(arrow_compute_testing OBJECT test_util_internal.cc) + # Even though this is still just an object library we still need to "link" our + # dependencies so that include paths are configured correctly + target_link_libraries(arrow_compute_testing PUBLIC ${ARROW_GTEST_GMOCK}) +endif() + set(ARROW_COMPUTE_TEST_PREFIX "arrow-compute") set(ARROW_COMPUTE_TEST_LABELS "arrow-compute-tests") set(ARROW_COMPUTE_TEST_ARGS PREFIX ${ARROW_COMPUTE_TEST_PREFIX} LABELS @@ -87,9 +95,16 @@ add_arrow_test(internals_test function_test.cc exec_test.cc kernel_test.cc - registry_test.cc) + registry_test.cc + EXTRA_LINK_LIBS + arrow_compute_testing) + +add_arrow_compute_test(expression_test + SOURCES + expression_test.cc + EXTRA_LINK_LIBS + arrow_compute_testing) -add_arrow_compute_test(expression_test SOURCES expression_test.cc) add_arrow_compute_test(row_test SOURCES key_hash_test.cc @@ -98,7 +113,9 @@ add_arrow_compute_test(row_test row/grouper_test.cc row/row_encoder_internal_test.cc row/row_test.cc - util_internal_test.cc) + util_internal_test.cc + EXTRA_LINK_LIBS + arrow_compute_testing) add_arrow_benchmark(function_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 84b508f5d9be4..4dedd1f23e090 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -18,9 +18,9 @@ # ---------------------------------------------------------------------- # Tests that don't require the full kernel library -# Define arrow_compute_testing object library for common test files +# Define arrow_compute_kernels_testing object library for common test files if(ARROW_TESTING) - add_library(arrow_compute_kernels_testing OBJECT test_util.cc) + add_library(arrow_compute_kernels_testing OBJECT test_util_internal.cc) # Even though this is still just an object library we still need to "link" our # dependencies so that include paths are configured correctly target_link_libraries(arrow_compute_kernels_testing PUBLIC ${ARROW_GTEST_GMOCK}) @@ -31,12 +31,14 @@ add_arrow_test(scalar_cast_test SOURCES scalar_cast_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) # ---------------------------------------------------------------------- # Scalar kernels -set(ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS arrow_compute_kernels_testing) +set(ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS arrow_compute_kernels_testing + arrow_compute_testing) if(ARROW_WITH_UTF8PROC) list(APPEND ARROW_COMPUTE_SCALAR_TYPE_TEST_LINK_LIBS utf8proc::utf8proc) endif() @@ -52,13 +54,15 @@ add_arrow_compute_test(scalar_if_else_test SOURCES scalar_if_else_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_compute_test(scalar_temporal_test SOURCES scalar_temporal_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_compute_test(scalar_math_test SOURCES @@ -66,7 +70,8 @@ add_arrow_compute_test(scalar_math_test scalar_compare_test.cc scalar_round_arithmetic_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_compute_test(scalar_utility_test SOURCES @@ -74,7 +79,8 @@ add_arrow_compute_test(scalar_utility_test scalar_set_lookup_test.cc scalar_validity_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute") add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute") @@ -101,25 +107,29 @@ add_arrow_compute_test(vector_test vector_run_end_encode_test.cc select_k_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_compute_test(vector_sort_test SOURCES vector_sort_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_compute_test(vector_selection_test SOURCES vector_selection_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_compute_test(vector_swizzle_test SOURCES vector_swizzle_test.cc EXTRA_LINK_LIBS - arrow_compute_kernels_testing) + arrow_compute_kernels_testing + arrow_compute_testing) add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute") add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute") @@ -138,6 +148,7 @@ add_arrow_compute_test(aggregate_test aggregate_test.cc EXTRA_LINK_LIBS arrow_compute_kernels_testing + arrow_compute_testing Boost::headers) # ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index 65439af2748b5..e6ad915fd5667 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -32,7 +32,7 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" #include "arrow/compute/kernels/aggregate_internal.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/type.h" #include "arrow/type_traits.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index 9a1a569081d9a..1162dad855da8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -29,7 +29,7 @@ #include "arrow/array.h" #include "arrow/buffer.h" #include "arrow/compute/api.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/datum.h" #include "arrow/type.h" #include "arrow/type_traits.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc index fe6ca527bfd70..a8d7cab5f5ad9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc @@ -24,7 +24,7 @@ #include "arrow/chunked_array.h" #include "arrow/compute/api_scalar.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index 80d5b3c46cae1..d7f73e2bb7bfe 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -46,7 +46,7 @@ #include "arrow/compute/cast.h" #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/codegen_internal.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc index 8f5952b40500a..b505e11a27d53 100644 --- a/cpp/src/arrow/compute/kernels/scalar_compare_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_compare_test.cc @@ -26,7 +26,7 @@ #include "arrow/array.h" #include "arrow/compute/api.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/builder.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 9a0ca325277dc..76ad19f3c4833 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -23,7 +23,7 @@ #include "arrow/array/concatenate.h" #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc index b6a6cac1b4382..f199f56aa2f0c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc @@ -20,7 +20,7 @@ #include "arrow/chunked_array.h" #include "arrow/compute/api.h" #include "arrow/compute/api_scalar.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/result.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_random_test.cc b/cpp/src/arrow/compute/kernels/scalar_random_test.cc index 81c0c90cb6b43..ff90d0c332ab4 100644 --- a/cpp/src/arrow/compute/kernels/scalar_random_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_random_test.cc @@ -18,7 +18,7 @@ #include #include "arrow/compute/api.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/thread_pool.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc index 3d205b0451dc2..a572af4195c60 100644 --- a/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_round_arithmetic_test.cc @@ -26,7 +26,7 @@ #include "arrow/array.h" #include "arrow/buffer.h" #include "arrow/compute/api.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/bit_util.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc index 89e10d1b54103..dbd8948992041 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_test.cc @@ -33,7 +33,7 @@ #include "arrow/array/builder_primitive.h" #include "arrow/chunked_array.h" #include "arrow/compute/api.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/testing/gtest_compat.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 59a22b9926456..38455dc146711 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -26,7 +26,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/exec.h" #include "arrow/compute/kernels/codegen_internal.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index c36fbf77eafb1..6f92036f55b44 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -21,7 +21,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/testing/util.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc index d1462838f3be6..2d1167a18137d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc @@ -19,7 +19,7 @@ #include "arrow/array.h" #include "arrow/compute/api.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/type.h" diff --git a/cpp/src/arrow/compute/kernels/select_k_test.cc b/cpp/src/arrow/compute/kernels/select_k_test.cc index c9dbe0bd4c075..05813ae6e70b5 100644 --- a/cpp/src/arrow/compute/kernels/select_k_test.cc +++ b/cpp/src/arrow/compute/kernels/select_k_test.cc @@ -25,7 +25,7 @@ #include "arrow/array/array_decimal.h" #include "arrow/array/concatenate.h" #include "arrow/compute/api_vector.h" -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/kernels/util_internal.h" #include "arrow/table.h" #include "arrow/testing/gtest_util.h" diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util_internal.cc similarity index 89% rename from cpp/src/arrow/compute/kernels/test_util.cc rename to cpp/src/arrow/compute/kernels/test_util_internal.cc index 2217787663a63..d48b3b0781e7a 100644 --- a/cpp/src/arrow/compute/kernels/test_util.cc +++ b/cpp/src/arrow/compute/kernels/test_util_internal.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/kernels/test_util_internal.h" #include #include @@ -281,62 +281,6 @@ void CheckScalarBinaryCommutative(std::string func_name, Datum left_input, CheckScalar(func_name, {right_input, left_input}, expected, options); } -namespace { - -void ValidateOutputImpl(const ArrayData& output) { - ASSERT_OK(::arrow::internal::ValidateArrayFull(output)); - TestInitialized(output); -} - -void ValidateOutputImpl(const ChunkedArray& output) { - ASSERT_OK(output.ValidateFull()); - for (const auto& chunk : output.chunks()) { - TestInitialized(*chunk); - } -} - -void ValidateOutputImpl(const RecordBatch& output) { - ASSERT_OK(output.ValidateFull()); - for (const auto& column : output.column_data()) { - TestInitialized(*column); - } -} - -void ValidateOutputImpl(const Table& output) { - ASSERT_OK(output.ValidateFull()); - for (const auto& column : output.columns()) { - for (const auto& chunk : column->chunks()) { - TestInitialized(*chunk); - } - } -} - -void ValidateOutputImpl(const Scalar& output) { ASSERT_OK(output.ValidateFull()); } - -} // namespace - -void ValidateOutput(const Datum& output) { - switch (output.kind()) { - case Datum::ARRAY: - ValidateOutputImpl(*output.array()); - break; - case Datum::CHUNKED_ARRAY: - ValidateOutputImpl(*output.chunked_array()); - break; - case Datum::RECORD_BATCH: - ValidateOutputImpl(*output.record_batch()); - break; - case Datum::TABLE: - ValidateOutputImpl(*output.table()); - break; - case Datum::SCALAR: - ValidateOutputImpl(*output.scalar()); - break; - default: - break; - } -} - void CheckDispatchBest(std::string func_name, std::vector original_values, std::vector expected_equivalent_values) { ASSERT_OK_AND_ASSIGN(auto function, GetFunctionRegistry()->GetFunction(func_name)); diff --git a/cpp/src/arrow/compute/kernels/test_util.h b/cpp/src/arrow/compute/kernels/test_util_internal.h similarity index 99% rename from cpp/src/arrow/compute/kernels/test_util.h rename to cpp/src/arrow/compute/kernels/test_util_internal.h index 11e77caeff861..e3a27ab9addbc 100644 --- a/cpp/src/arrow/compute/kernels/test_util.h +++ b/cpp/src/arrow/compute/kernels/test_util_internal.h @@ -29,6 +29,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" #include "arrow/compute/kernel.h" +#include "arrow/compute/test_util_internal.h" #include "arrow/datum.h" #include "arrow/memory_pool.h" #include "arrow/pretty_print.h" @@ -131,8 +132,6 @@ void CheckScalarBinaryCommutative(std::string func_name, Datum left_input, void CheckVectorUnary(std::string func_name, Datum input, Datum expected, const FunctionOptions* options = nullptr); -void ValidateOutput(const Datum& output); - static constexpr random::SeedType kRandomSeed = 0x0ff1ce; template