From 878c0f4ff41e11e358b7f7278302ddac8713b032 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Mon, 16 Aug 2021 20:07:57 +0800 Subject: [PATCH 1/8] support nested types for nth_element reduction Signed-off-by: sperlingxx --- cpp/src/reductions/reductions.cpp | 29 +++- cpp/tests/groupby/nth_element_tests.cpp | 57 +++++++ cpp/tests/reductions/reduction_tests.cpp | 194 +++++++++++++++++++++++ 3 files changed, 274 insertions(+), 6 deletions(-) diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index a8117373ca4..720641fa569 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -25,6 +25,7 @@ #include #include +#include #include namespace cudf { @@ -112,13 +113,29 @@ std::unique_ptr reduce( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - std::unique_ptr result = make_default_constructed_scalar(output_dtype, stream, mr); - result->set_valid_async(false, stream); - - // check if input column is empty - if (col.size() <= col.null_count()) return result; + // Returns default scalar if input column is non-valid. In terms of nested columns, we need to + // handcraft the default scalar with input column. + if (col.size() <= col.null_count()) { + if (!is_nested(output_dtype)) { + auto result = make_default_constructed_scalar(output_dtype, stream, mr); + result->set_valid_async(false, stream); + return result; + } else if (col.type().id() == type_id::LIST) { + auto result = make_list_scalar(empty_like(col)->view(), stream, mr); + result->set_valid_async(false, stream); + return result; + } else if (col.type().id() == type_id::STRUCT) { + // Struct scalar inputs must have exactly 1 row. + CUDF_EXPECTS(!col.is_empty(), "Can not create empty struct scalar"); + auto result = get_element(col, 1, stream, mr); + result->set_valid_async(false, stream); + return result; + } else { + CUDF_FAIL("Unsupported data type for default scalar"); + } + } - result = + std::unique_ptr result = aggregation_dispatcher(agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); return result; } diff --git a/cpp/tests/groupby/nth_element_tests.cpp b/cpp/tests/groupby/nth_element_tests.cpp index d5029147906..92916aa1f90 100644 --- a/cpp/tests/groupby/nth_element_tests.cpp +++ b/cpp/tests/groupby/nth_element_tests.cpp @@ -405,5 +405,62 @@ TYPED_TEST(groupby_nth_element_lists_test, EmptyInput) keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(2)); } +struct groupby_nth_element_structs_test : BaseFixture { +}; + +TEST_F(groupby_nth_element_structs_test, Basics) +{ + using structs = cudf::test::structs_column_wrapper; + using ints = cudf::test::fixed_width_column_wrapper; + using doubles = cudf::test::fixed_width_column_wrapper; + using strings = cudf::test::strings_column_wrapper; + + auto keys = ints{0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + auto child0 = ints{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto child1 = doubles{0.1, 1.2, 2.3, 3.4, 4.51, 5.3e4, 6.3231, -0.07, 832.1, 9.999}; + auto child2 = strings{"", "a", "b", "c", "d", "e", "f", "g", "HH", "JJJ"}; + auto values = structs{{child0, child1, child2}, {1, 0, 1, 0, 1, 1, 1, 1, 0, 1}}; + + auto expected_keys = ints{0, 1, 2, 3}; + auto expected_ch0 = ints{1, 4, 7, 0}; + auto expected_ch1 = doubles{1.2, 4.51, -0.07, 0.0}; + auto expected_ch2 = strings{"a", "d", "g", ""}; + auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}}; + test_single_agg( + keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(1)); + + expected_keys = ints{0, 1, 2, 3}; + expected_ch0 = ints{0, 4, 6, 9}; + expected_ch1 = doubles{0.1, 4.51, 6.3231, 9.999}; + expected_ch2 = strings{"", "d", "f", "JJJ"}; + expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {1, 1, 1, 1}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); +} + +TEST_F(groupby_nth_element_structs_test, EmptyInput) +{ + using structs = cudf::test::structs_column_wrapper; + using ints = cudf::test::fixed_width_column_wrapper; + using doubles = cudf::test::fixed_width_column_wrapper; + using strings = cudf::test::strings_column_wrapper; + + auto keys = ints{}; + auto child0 = ints{}; + auto child1 = doubles{}; + auto child2 = strings{}; + auto values = structs{{child0, child1, child2}}; + + auto expected_keys = ints{}; + auto expected_ch0 = ints{}; + auto expected_ch1 = doubles{}; + auto expected_ch2 = strings{}; + auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}}; + test_single_agg( + keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(0)); +} } // namespace test } // namespace cudf diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index da9032737f2..5d7aa2651b3 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -24,8 +24,10 @@ #include #include #include +#include #include #include +#include #include @@ -1872,4 +1874,196 @@ TYPED_TEST(DictionaryReductionTest, Quantile) output_type); } +//------------------------------------------------------------------- +struct ListReductionTest : public cudf::test::BaseFixture { + void reduction_test(cudf::column_view const& input_data, + cudf::column_view const& expected_value, + bool succeeded_condition, + bool is_valid, + std::unique_ptr const& agg) + { + auto statement = [&]() { + std::unique_ptr result = + cudf::reduce(input_data, agg, cudf::data_type(cudf::type_id::LIST)); + auto list_result = dynamic_cast(result.get()); + EXPECT_EQ(is_valid, list_result->is_valid()); + if (is_valid) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_value, list_result->view()); } + }; + + if (succeeded_condition) { + CUDF_EXPECT_NO_THROW(statement()); + } else { + EXPECT_ANY_THROW(statement()); + } + } +}; + +TEST_F(ListReductionTest, ListReductionNthElement) +{ + using ListCol = cudf::test::lists_column_wrapper; + using ElementCol = cudf::test::fixed_width_column_wrapper; + + // test without nulls + ListCol col{{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}; + this->reduction_test(col, + ElementCol{0, 5, -3}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); + + // test with null-include + std::vector validity{1, 0, 0, 1, 1, 0}; + ListCol col_nulls({{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}, validity.begin()); + this->reduction_test(col_nulls, + ElementCol{-2}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(1, cudf::null_policy::EXCLUDE)); + + // test with null-include + this->reduction_test(col_nulls, + ElementCol{}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE)); +} + +TEST_F(ListReductionTest, NonValidListReductionNthElement) +{ + using ListCol = cudf::test::lists_column_wrapper; + using ElementCol = cudf::test::fixed_width_column_wrapper; + + // test against col.size() <= col.null_count() + std::vector validity{0}; + this->reduction_test(ListCol{{{1, 2}}, validity.begin()}, + ElementCol{}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); + + // test against empty input + this->reduction_test(ListCol{}, + ElementCol{{0}, {0}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); +} + +//------------------------------------------------------------------- +struct StructReductionTest : public cudf::test::BaseFixture { + using StructCol = cudf::test::structs_column_wrapper; + + void reduction_test(StructCol const& struct_column, + cudf::table_view const& expected_value, + bool succeeded_condition, + bool is_valid, + std::unique_ptr const& agg) + { + auto statement = [&]() { + std::unique_ptr result = + cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::LIST)); + auto struct_result = dynamic_cast(result.get()); + EXPECT_EQ(is_valid, struct_result->is_valid()); + if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUAL(expected_value, struct_result->view()); } + }; + + if (succeeded_condition) { + CUDF_EXPECT_NO_THROW(statement()); + } else { + EXPECT_ANY_THROW(statement()); + } + } +}; + +TEST_F(StructReductionTest, StructReductionNthElement) +{ + using ChildCol = cudf::test::fixed_width_column_wrapper; + + // test without nulls + auto child0 = *ChildCol{-3, 2, 1, 0, 5, -3, -2, 28}.release(); + auto child1 = *ChildCol{0, 1, 2, 3, 4, 5, 6, 7}.release(); + auto child2 = + *ChildCol{{-10, 10, -100, 100, -1000, 1000, -10000, 10000}, {1, 0, 0, 1, 1, 1, 0, 1}}.release(); + std::vector> input_vector; + input_vector.push_back(std::make_unique(child0)); + input_vector.push_back(std::make_unique(child1)); + input_vector.push_back(std::make_unique(child2)); + StructCol struct_col(std::move(input_vector)); + auto result_col0 = ChildCol{1}; + auto result_col1 = ChildCol{2}; + auto result_col2 = ChildCol{{0}, {0}}; + this->reduction_test( + struct_col, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); + + // test with null-include + std::vector> input_vector_null_include; + input_vector_null_include.push_back(std::make_unique(child0)); + input_vector_null_include.push_back(std::make_unique(child1)); + input_vector_null_include.push_back(std::make_unique(child2)); + std::vector validity{1, 1, 1, 0, 1, 0, 0, 1}; + StructCol struct_col_null_include(std::move(input_vector_null_include), validity); + result_col0 = ChildCol{{0}, {0}}; + result_col1 = ChildCol{{0}, {0}}; + result_col2 = ChildCol{{0}, {0}}; + this->reduction_test( + struct_col_null_include, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(6, cudf::null_policy::INCLUDE)); + + // test with null-exclude + std::vector> input_vector_null_exclude; + input_vector_null_exclude.push_back(std::make_unique(child0)); + input_vector_null_exclude.push_back(std::make_unique(child1)); + input_vector_null_exclude.push_back(std::make_unique(child2)); + StructCol struct_col_with_null_exclude(std::move(input_vector_null_exclude), validity); + result_col0 = ChildCol{{28}, {1}}; + result_col1 = ChildCol{{7}, {1}}; + result_col2 = ChildCol{{10000}, {1}}; + this->reduction_test( + struct_col_with_null_exclude, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(4, cudf::null_policy::EXCLUDE)); +} + +TEST_F(StructReductionTest, NonValidStructReductionNthElement) +{ + using ChildCol = cudf::test::fixed_width_column_wrapper; + + // test against col.size() <= col.null_count() + auto child0 = ChildCol{-3, 3}; + auto child1 = ChildCol{0, 0}; + auto child2 = ChildCol{{-10, 10}, {0, 1}}; + auto struct_col = StructCol{{child0, child1, child2}, {0, 0}}; + auto ret_col0 = ChildCol{{0}, {0}}; + auto ret_col1 = ChildCol{{0}, {0}}; + auto ret_col2 = ChildCol{{0}, {0}}; + this->reduction_test(struct_col, + cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); + + // test against empty input (would fail because we can not create empty struct scalar) + child0 = ChildCol{}; + child1 = ChildCol{}; + child2 = ChildCol{}; + struct_col = StructCol{{child0, child1, child2}}; + ret_col0 = ChildCol{}; + ret_col1 = ChildCol{}; + ret_col2 = ChildCol{}; + this->reduction_test(struct_col, + cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value, + false, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); +} + CUDF_TEST_PROGRAM_MAIN() From a360e0d68efa712969538530a1402e1acf72588d Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Tue, 17 Aug 2021 18:21:53 +0800 Subject: [PATCH 2/8] update Signed-off-by: sperlingxx --- cpp/include/cudf/scalar/scalar_factories.hpp | 14 ++ cpp/src/reductions/reductions.cpp | 19 +- cpp/src/scalar/scalar_factories.cpp | 23 +++ cpp/tests/groupby/nth_element_tests.cpp | 39 +++++ cpp/tests/reductions/reduction_tests.cpp | 174 +++++++++++++------ 5 files changed, 202 insertions(+), 67 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index b96a8c65a04..5db0257660a 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -121,6 +121,20 @@ std::unique_ptr make_default_constructed_scalar( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Creates an empty (invalid) scalar of the same type as the `input` column_view. + * + * @throws std::bad_alloc if device memory allocation fails + * + * @param input Immutable view of input column to emulate + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_empty_scalar_like( + column_view const& input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Construct scalar using the given value of fixed width type * diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 720641fa569..7d73a591eb8 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -116,23 +116,10 @@ std::unique_ptr reduce( // Returns default scalar if input column is non-valid. In terms of nested columns, we need to // handcraft the default scalar with input column. if (col.size() <= col.null_count()) { - if (!is_nested(output_dtype)) { - auto result = make_default_constructed_scalar(output_dtype, stream, mr); - result->set_valid_async(false, stream); - return result; - } else if (col.type().id() == type_id::LIST) { - auto result = make_list_scalar(empty_like(col)->view(), stream, mr); - result->set_valid_async(false, stream); - return result; - } else if (col.type().id() == type_id::STRUCT) { - // Struct scalar inputs must have exactly 1 row. - CUDF_EXPECTS(!col.is_empty(), "Can not create empty struct scalar"); - auto result = get_element(col, 1, stream, mr); - result->set_valid_async(false, stream); - return result; - } else { - CUDF_FAIL("Unsupported data type for default scalar"); + if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) { + return make_default_constructed_scalar(output_dtype, stream, mr); } + return make_empty_scalar_like(col, stream, mr); } std::unique_ptr result = diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index af78d84d874..d257b4c9955 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -20,6 +20,8 @@ #include #include +#include +#include #include namespace cudf { @@ -165,4 +167,25 @@ std::unique_ptr make_default_constructed_scalar(data_type type, return type_dispatcher(type, default_scalar_functor{}, stream, mr); } +std::unique_ptr make_empty_scalar_like(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + std::unique_ptr result; + switch (column.type().id()) { + case type_id::LIST: + result = make_list_scalar(empty_like(column)->view(), stream, mr); + result->set_valid_async(false, stream); + break; + case type_id::STRUCT: + // Struct scalar inputs must have exactly 1 row. + CUDF_EXPECTS(!column.is_empty(), "Can not create empty struct scalar"); + result = detail::get_element(column, 1, stream, mr); + result->set_valid_async(false, stream); + break; + default: result = make_default_constructed_scalar(column.type(), stream, mr); + } + return result; +} + } // namespace cudf diff --git a/cpp/tests/groupby/nth_element_tests.cpp b/cpp/tests/groupby/nth_element_tests.cpp index 92916aa1f90..95a466ae0ff 100644 --- a/cpp/tests/groupby/nth_element_tests.cpp +++ b/cpp/tests/groupby/nth_element_tests.cpp @@ -441,6 +441,45 @@ TEST_F(groupby_nth_element_structs_test, Basics) cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); } +TEST_F(groupby_nth_element_structs_test, NestedStructs) +{ + using structs = cudf::test::structs_column_wrapper; + using ints = cudf::test::fixed_width_column_wrapper; + using doubles = cudf::test::fixed_width_column_wrapper; + using lists = cudf::test::lists_column_wrapper; + + auto keys = ints{0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + auto child0 = ints{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto child0_of_child1 = ints{0, -1, -2, -3, -4, -5, -6, -7, -8, -9}; + auto child1_of_child1 = doubles{0.1, 1.2, 2.3, 3.4, 4.51, 5.3e4, 6.3231, -0.07, 832.1, 9.999}; + auto child1 = structs{child0_of_child1, child1_of_child1}; + auto child2 = lists{{0}, {1, 2, 3}, {}, {4}, {5, 6}, {}, {}, {7}, {8, 9}, {}}; + auto values = structs{{child0, child1, child2}, {1, 0, 1, 0, 1, 1, 1, 1, 0, 1}}; + + auto expected_keys = ints{0, 1, 2, 3}; + auto expected_ch0 = ints{1, 4, 7, 0}; + auto expected_ch0_of_ch1 = ints{-1, -4, -7, 0}; + auto expected_ch1_of_ch1 = doubles{1.2, 4.51, -0.07, 0.0}; + auto expected_ch1 = structs{expected_ch0_of_ch1, expected_ch1_of_ch1}; + auto expected_ch2 = lists{{1, 2, 3}, {5, 6}, {7}, {}}; + auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}}; + test_single_agg( + keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(1)); + + expected_keys = ints{0, 1, 2, 3}; + expected_ch0 = ints{0, 4, 6, 9}; + expected_ch0_of_ch1 = ints{0, -4, -6, -9}; + expected_ch1_of_ch1 = doubles{0.1, 4.51, 6.3231, 9.999}; + expected_ch1 = structs{expected_ch0_of_ch1, expected_ch1_of_ch1}; + expected_ch2 = lists{{0}, {5, 6}, {}, {}}; + expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {1, 1, 1, 1}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); +} + TEST_F(groupby_nth_element_structs_test, EmptyInput) { using structs = cudf::test::structs_column_wrapper; diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 5d7aa2651b3..5fb9d6ce62b 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1900,20 +1900,20 @@ struct ListReductionTest : public cudf::test::BaseFixture { TEST_F(ListReductionTest, ListReductionNthElement) { - using ListCol = cudf::test::lists_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; using ElementCol = cudf::test::fixed_width_column_wrapper; // test without nulls - ListCol col{{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}; + LCW col{{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}; this->reduction_test(col, ElementCol{0, 5, -3}, // expected_value, true, true, cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); - // test with null-include + // test with null-exclude std::vector validity{1, 0, 0, 1, 1, 0}; - ListCol col_nulls({{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}, validity.begin()); + LCW col_nulls({{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}, validity.begin()); this->reduction_test(col_nulls, ElementCol{-2}, // expected_value, true, @@ -1928,21 +1928,51 @@ TEST_F(ListReductionTest, ListReductionNthElement) cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE)); } +TEST_F(ListReductionTest, NestedListReductionNthElement) +{ + using LCW = cudf::test::lists_column_wrapper; + + // test without nulls + auto validity = std::vector{1, 0, 0, 1, 1}; + auto nested_list = LCW( + {{LCW{}, LCW{2, 3, 4}}, {}, {LCW{5}, LCW{6}, LCW{7, 8}}, {LCW{9, 10}}, {LCW{11}, LCW{12, 13}}}, + validity.begin()); + this->reduction_test(nested_list, + LCW{{}, {2, 3, 4}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); + + // test with null-include + this->reduction_test(nested_list, + LCW{}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); + + // test with null-exclude + this->reduction_test(nested_list, + LCW{{11}, {12, 13}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(2, cudf::null_policy::EXCLUDE)); +} + TEST_F(ListReductionTest, NonValidListReductionNthElement) { - using ListCol = cudf::test::lists_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; using ElementCol = cudf::test::fixed_width_column_wrapper; // test against col.size() <= col.null_count() std::vector validity{0}; - this->reduction_test(ListCol{{{1, 2}}, validity.begin()}, + this->reduction_test(LCW{{{1, 2}}, validity.begin()}, ElementCol{}, // expected_value, true, false, cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); // test against empty input - this->reduction_test(ListCol{}, + this->reduction_test(LCW{}, ElementCol{{0}, {0}}, // expected_value, true, false, @@ -1951,9 +1981,9 @@ TEST_F(ListReductionTest, NonValidListReductionNthElement) //------------------------------------------------------------------- struct StructReductionTest : public cudf::test::BaseFixture { - using StructCol = cudf::test::structs_column_wrapper; + using SCW = cudf::test::structs_column_wrapper; - void reduction_test(StructCol const& struct_column, + void reduction_test(SCW const& struct_column, cudf::table_view const& expected_value, bool succeeded_condition, bool is_valid, @@ -1961,7 +1991,7 @@ struct StructReductionTest : public cudf::test::BaseFixture { { auto statement = [&]() { std::unique_ptr result = - cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::LIST)); + cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::STRUCT)); auto struct_result = dynamic_cast(result.get()); EXPECT_EQ(is_valid, struct_result->is_valid()); if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUAL(expected_value, struct_result->view()); } @@ -1977,21 +2007,21 @@ struct StructReductionTest : public cudf::test::BaseFixture { TEST_F(StructReductionTest, StructReductionNthElement) { - using ChildCol = cudf::test::fixed_width_column_wrapper; + using ICW = cudf::test::fixed_width_column_wrapper; // test without nulls - auto child0 = *ChildCol{-3, 2, 1, 0, 5, -3, -2, 28}.release(); - auto child1 = *ChildCol{0, 1, 2, 3, 4, 5, 6, 7}.release(); + auto child0 = *ICW{-3, 2, 1, 0, 5, -3, -2, 28}.release(); + auto child1 = *ICW{0, 1, 2, 3, 4, 5, 6, 7}.release(); auto child2 = - *ChildCol{{-10, 10, -100, 100, -1000, 1000, -10000, 10000}, {1, 0, 0, 1, 1, 1, 0, 1}}.release(); + *ICW{{-10, 10, -100, 100, -1000, 1000, -10000, 10000}, {1, 0, 0, 1, 1, 1, 0, 1}}.release(); std::vector> input_vector; input_vector.push_back(std::make_unique(child0)); input_vector.push_back(std::make_unique(child1)); input_vector.push_back(std::make_unique(child2)); - StructCol struct_col(std::move(input_vector)); - auto result_col0 = ChildCol{1}; - auto result_col1 = ChildCol{2}; - auto result_col2 = ChildCol{{0}, {0}}; + auto struct_col = SCW(std::move(input_vector)); + auto result_col0 = ICW{1}; + auto result_col1 = ICW{2}; + auto result_col2 = ICW{{0}, {0}}; this->reduction_test( struct_col, cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, @@ -2000,51 +2030,93 @@ TEST_F(StructReductionTest, StructReductionNthElement) cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); // test with null-include - std::vector> input_vector_null_include; - input_vector_null_include.push_back(std::make_unique(child0)); - input_vector_null_include.push_back(std::make_unique(child1)); - input_vector_null_include.push_back(std::make_unique(child2)); std::vector validity{1, 1, 1, 0, 1, 0, 0, 1}; - StructCol struct_col_null_include(std::move(input_vector_null_include), validity); - result_col0 = ChildCol{{0}, {0}}; - result_col1 = ChildCol{{0}, {0}}; - result_col2 = ChildCol{{0}, {0}}; + input_vector.clear(); + input_vector.push_back(std::make_unique(child0)); + input_vector.push_back(std::make_unique(child1)); + input_vector.push_back(std::make_unique(child2)); + struct_col = SCW(std::move(input_vector), validity); + result_col0 = ICW{{0}, {0}}; + result_col1 = ICW{{0}, {0}}; + result_col2 = ICW{{0}, {0}}; this->reduction_test( - struct_col_null_include, + struct_col, cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, true, false, cudf::make_nth_element_aggregation(6, cudf::null_policy::INCLUDE)); // test with null-exclude - std::vector> input_vector_null_exclude; - input_vector_null_exclude.push_back(std::make_unique(child0)); - input_vector_null_exclude.push_back(std::make_unique(child1)); - input_vector_null_exclude.push_back(std::make_unique(child2)); - StructCol struct_col_with_null_exclude(std::move(input_vector_null_exclude), validity); - result_col0 = ChildCol{{28}, {1}}; - result_col1 = ChildCol{{7}, {1}}; - result_col2 = ChildCol{{10000}, {1}}; + result_col0 = ICW{{28}, {1}}; + result_col1 = ICW{{7}, {1}}; + result_col2 = ICW{{10000}, {1}}; this->reduction_test( - struct_col_with_null_exclude, + struct_col, cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, true, true, cudf::make_nth_element_aggregation(4, cudf::null_policy::EXCLUDE)); } +TEST_F(StructReductionTest, NestedStructReductionNthElement) +{ + using ICW = cudf::test::fixed_width_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; + + auto int_col0 = ICW{-4, -3, -2, -1, 0}; + auto struct_col0 = SCW({int_col0}, std::vector{1, 0, 0, 1, 1}); + auto int_col1 = ICW{0, 1, 2, 3, 4}; + auto list_col = LCW{{0}, {}, {1, 2}, {3}, {4}}; + auto struct_col1 = SCW({struct_col0, int_col1, list_col}, std::vector{1, 1, 1, 0, 1}); + auto result_child0 = ICW{0}; + auto result_col0 = SCW({result_child0}, std::vector{0}); + auto result_col1 = ICW{{1}, {1}}; + auto result_col2 = LCW({LCW{}}, std::vector{1}.begin()); + // test without nulls + this->reduction_test( + struct_col1, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE)); + + // test with null-include + result_child0 = ICW{0}; + result_col0 = SCW({result_child0}, std::vector{0}); + result_col1 = ICW{{0}, {0}}; + result_col2 = LCW({LCW{3}}, std::vector{0}.begin()); + this->reduction_test( + struct_col1, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(3, cudf::null_policy::INCLUDE)); + + // test with null-exclude + result_child0 = ICW{0}; + result_col0 = SCW({result_child0}, std::vector{1}); + result_col1 = ICW{{4}, {1}}; + result_col2 = LCW({LCW{4}}, std::vector{1}.begin()); + this->reduction_test( + struct_col1, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(3, cudf::null_policy::EXCLUDE)); +} + TEST_F(StructReductionTest, NonValidStructReductionNthElement) { - using ChildCol = cudf::test::fixed_width_column_wrapper; + using ICW = cudf::test::fixed_width_column_wrapper; // test against col.size() <= col.null_count() - auto child0 = ChildCol{-3, 3}; - auto child1 = ChildCol{0, 0}; - auto child2 = ChildCol{{-10, 10}, {0, 1}}; - auto struct_col = StructCol{{child0, child1, child2}, {0, 0}}; - auto ret_col0 = ChildCol{{0}, {0}}; - auto ret_col1 = ChildCol{{0}, {0}}; - auto ret_col2 = ChildCol{{0}, {0}}; + auto child0 = ICW{-3, 3}; + auto child1 = ICW{0, 0}; + auto child2 = ICW{{-10, 10}, {0, 1}}; + auto struct_col = SCW{{child0, child1, child2}, {0, 0}}; + auto ret_col0 = ICW{{0}, {0}}; + auto ret_col1 = ICW{{0}, {0}}; + auto ret_col2 = ICW{{0}, {0}}; this->reduction_test(struct_col, cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value, true, @@ -2052,13 +2124,13 @@ TEST_F(StructReductionTest, NonValidStructReductionNthElement) cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); // test against empty input (would fail because we can not create empty struct scalar) - child0 = ChildCol{}; - child1 = ChildCol{}; - child2 = ChildCol{}; - struct_col = StructCol{{child0, child1, child2}}; - ret_col0 = ChildCol{}; - ret_col1 = ChildCol{}; - ret_col2 = ChildCol{}; + child0 = ICW{}; + child1 = ICW{}; + child2 = ICW{}; + struct_col = SCW{{child0, child1, child2}}; + ret_col0 = ICW{}; + ret_col1 = ICW{}; + ret_col2 = ICW{}; this->reduction_test(struct_col, cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value, false, From fbd1039ac849e2fa92fac62538c5edd1f4a4e2b2 Mon Sep 17 00:00:00 2001 From: Alfred Xu Date: Wed, 18 Aug 2021 10:51:46 +0800 Subject: [PATCH 3/8] Apply suggestions from code review Co-authored-by: Nghia Truong --- cpp/include/cudf/scalar/scalar_factories.hpp | 2 +- cpp/src/reductions/reductions.cpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index 5db0257660a..cc49c8ec982 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -124,7 +124,7 @@ std::unique_ptr make_default_constructed_scalar( /** * @brief Creates an empty (invalid) scalar of the same type as the `input` column_view. * - * @throws std::bad_alloc if device memory allocation fails + * @throw std::bad_alloc if device memory allocation fails * * @param input Immutable view of input column to emulate * @param stream CUDA stream used for device memory operations. diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 7d73a591eb8..bc8319628bd 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -122,9 +122,7 @@ std::unique_ptr reduce( return make_empty_scalar_like(col, stream, mr); } - std::unique_ptr result = - aggregation_dispatcher(agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); - return result; + return aggregation_dispatcher(agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); } } // namespace detail From cdd1a0cc7a90012998aa5fd2672ced9d19ffdce3 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Wed, 18 Aug 2021 11:30:38 +0800 Subject: [PATCH 4/8] fix style --- cpp/src/reductions/reductions.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index bc8319628bd..699494c49c5 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -122,7 +122,8 @@ std::unique_ptr reduce( return make_empty_scalar_like(col, stream, mr); } - return aggregation_dispatcher(agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); + return aggregation_dispatcher( + agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); } } // namespace detail From ae5f633cb4e00b71a99bd75a699443a0ccfa6978 Mon Sep 17 00:00:00 2001 From: Alfred Xu Date: Thu, 19 Aug 2021 10:03:09 +0800 Subject: [PATCH 5/8] Apply suggestions from code review Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> Co-authored-by: Nghia Truong --- cpp/include/cudf/scalar/scalar_factories.hpp | 2 +- cpp/tests/reductions/reduction_tests.cpp | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index cc49c8ec982..b949f8d542f 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -124,7 +124,7 @@ std::unique_ptr make_default_constructed_scalar( /** * @brief Creates an empty (invalid) scalar of the same type as the `input` column_view. * - * @throw std::bad_alloc if device memory allocation fails + * @throw cudf::logic_error if the `input` column is struct type and empty * * @param input Immutable view of input column to emulate * @param stream CUDA stream used for device memory operations. diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index 5fb9d6ce62b..88318a41882 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -1874,7 +1874,6 @@ TYPED_TEST(DictionaryReductionTest, Quantile) output_type); } -//------------------------------------------------------------------- struct ListReductionTest : public cudf::test::BaseFixture { void reduction_test(cudf::column_view const& input_data, cudf::column_view const& expected_value, @@ -1979,7 +1978,6 @@ TEST_F(ListReductionTest, NonValidListReductionNthElement) cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); } -//------------------------------------------------------------------- struct StructReductionTest : public cudf::test::BaseFixture { using SCW = cudf::test::structs_column_wrapper; From 5d54751c55c397aec8351c4c6e11c2bb35b9e2ce Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Fri, 20 Aug 2021 12:01:25 +0800 Subject: [PATCH 6/8] update --- cpp/src/scalar/scalar_factories.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index d257b4c9955..88234d19d46 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -20,7 +20,6 @@ #include #include -#include #include #include @@ -178,9 +177,7 @@ std::unique_ptr make_empty_scalar_like(column_view const& column, result->set_valid_async(false, stream); break; case type_id::STRUCT: - // Struct scalar inputs must have exactly 1 row. - CUDF_EXPECTS(!column.is_empty(), "Can not create empty struct scalar"); - result = detail::get_element(column, 1, stream, mr); + result = detail::get_element(column, 0, stream, mr); result->set_valid_async(false, stream); break; default: result = make_default_constructed_scalar(column.type(), stream, mr); From 1077e70ccb7915a20428d9362e3112849600c5c3 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Fri, 20 Aug 2021 12:03:24 +0800 Subject: [PATCH 7/8] update --- cpp/src/scalar/scalar_factories.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index 88234d19d46..25418cf0f7e 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -177,6 +177,7 @@ std::unique_ptr make_empty_scalar_like(column_view const& column, result->set_valid_async(false, stream); break; case type_id::STRUCT: + // The input column must have at least 1 row to extract a scalar (row) from it. result = detail::get_element(column, 0, stream, mr); result->set_valid_async(false, stream); break; From 608565a317a4f5f5594a818d5fed3d8f3a8d7177 Mon Sep 17 00:00:00 2001 From: sperlingxx Date: Fri, 20 Aug 2021 15:36:13 +0800 Subject: [PATCH 8/8] fix --- cpp/tests/groupby/nth_element_tests.cpp | 27 ++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cpp/tests/groupby/nth_element_tests.cpp b/cpp/tests/groupby/nth_element_tests.cpp index 8895600fefb..47dfa2426eb 100644 --- a/cpp/tests/groupby/nth_element_tests.cpp +++ b/cpp/tests/groupby/nth_element_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -435,8 +435,11 @@ TEST_F(groupby_nth_element_structs_test, Basics) auto expected_ch1 = doubles{1.2, 4.51, -0.07, 0.0}; auto expected_ch2 = strings{"a", "d", "g", ""}; auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}}; - test_single_agg( - keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(1)); + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(1)); expected_keys = ints{0, 1, 2, 3}; expected_ch0 = ints{0, 4, 6, 9}; @@ -447,7 +450,7 @@ TEST_F(groupby_nth_element_structs_test, Basics) values, expected_keys, expected_values, - cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); + cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); } TEST_F(groupby_nth_element_structs_test, NestedStructs) @@ -472,8 +475,11 @@ TEST_F(groupby_nth_element_structs_test, NestedStructs) auto expected_ch1 = structs{expected_ch0_of_ch1, expected_ch1_of_ch1}; auto expected_ch2 = lists{{1, 2, 3}, {5, 6}, {7}, {}}; auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}}; - test_single_agg( - keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(1)); + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(1)); expected_keys = ints{0, 1, 2, 3}; expected_ch0 = ints{0, 4, 6, 9}; @@ -486,7 +492,7 @@ TEST_F(groupby_nth_element_structs_test, NestedStructs) values, expected_keys, expected_values, - cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); + cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); } TEST_F(groupby_nth_element_structs_test, EmptyInput) @@ -507,8 +513,11 @@ TEST_F(groupby_nth_element_structs_test, EmptyInput) auto expected_ch1 = doubles{}; auto expected_ch2 = strings{}; auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}}; - test_single_agg( - keys, values, expected_keys, expected_values, cudf::make_nth_element_aggregation(0)); + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0)); } } // namespace test } // namespace cudf