diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp index b96a8c65a04..b949f8d542f 100644 --- a/cpp/include/cudf/scalar/scalar_factories.hpp +++ b/cpp/include/cudf/scalar/scalar_factories.hpp @@ -121,6 +121,20 @@ std::unique_ptr make_default_constructed_scalar( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); +/** + * @brief Creates an empty (invalid) scalar of the same type as the `input` column_view. + * + * @throw cudf::logic_error if the `input` column is struct type and empty + * + * @param input Immutable view of input column to emulate + * @param stream CUDA stream used for device memory operations. + * @param mr Device memory resource used to allocate the scalar's `data` and `is_valid` bool. + */ +std::unique_ptr make_empty_scalar_like( + column_view const& input, + rmm::cuda_stream_view stream = rmm::cuda_stream_default, + rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); + /** * @brief Construct scalar using the given value of fixed width type * diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index a8117373ca4..699494c49c5 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -25,6 +25,7 @@ #include #include +#include #include namespace cudf { @@ -112,15 +113,17 @@ std::unique_ptr reduce( rmm::cuda_stream_view stream = rmm::cuda_stream_default, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - std::unique_ptr result = make_default_constructed_scalar(output_dtype, stream, mr); - result->set_valid_async(false, stream); - - // check if input column is empty - if (col.size() <= col.null_count()) return result; + // Returns default scalar if input column is non-valid. In terms of nested columns, we need to + // handcraft the default scalar with input column. + if (col.size() <= col.null_count()) { + if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) { + return make_default_constructed_scalar(output_dtype, stream, mr); + } + return make_empty_scalar_like(col, stream, mr); + } - result = - aggregation_dispatcher(agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); - return result; + return aggregation_dispatcher( + agg->kind, reduce_dispatch_functor{col, output_dtype, stream, mr}, agg); } } // namespace detail diff --git a/cpp/src/scalar/scalar_factories.cpp b/cpp/src/scalar/scalar_factories.cpp index af78d84d874..25418cf0f7e 100644 --- a/cpp/src/scalar/scalar_factories.cpp +++ b/cpp/src/scalar/scalar_factories.cpp @@ -20,6 +20,7 @@ #include #include +#include #include namespace cudf { @@ -165,4 +166,24 @@ std::unique_ptr make_default_constructed_scalar(data_type type, return type_dispatcher(type, default_scalar_functor{}, stream, mr); } +std::unique_ptr make_empty_scalar_like(column_view const& column, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) +{ + std::unique_ptr result; + switch (column.type().id()) { + case type_id::LIST: + result = make_list_scalar(empty_like(column)->view(), stream, mr); + result->set_valid_async(false, stream); + break; + case type_id::STRUCT: + // The input column must have at least 1 row to extract a scalar (row) from it. + result = detail::get_element(column, 0, stream, mr); + result->set_valid_async(false, stream); + break; + default: result = make_default_constructed_scalar(column.type(), stream, mr); + } + return result; +} + } // namespace cudf diff --git a/cpp/tests/groupby/nth_element_tests.cpp b/cpp/tests/groupby/nth_element_tests.cpp index 22f1e14815f..47dfa2426eb 100644 --- a/cpp/tests/groupby/nth_element_tests.cpp +++ b/cpp/tests/groupby/nth_element_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -414,5 +414,110 @@ TYPED_TEST(groupby_nth_element_lists_test, EmptyInput) cudf::make_nth_element_aggregation(2)); } +struct groupby_nth_element_structs_test : BaseFixture { +}; + +TEST_F(groupby_nth_element_structs_test, Basics) +{ + using structs = cudf::test::structs_column_wrapper; + using ints = cudf::test::fixed_width_column_wrapper; + using doubles = cudf::test::fixed_width_column_wrapper; + using strings = cudf::test::strings_column_wrapper; + + auto keys = ints{0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + auto child0 = ints{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto child1 = doubles{0.1, 1.2, 2.3, 3.4, 4.51, 5.3e4, 6.3231, -0.07, 832.1, 9.999}; + auto child2 = strings{"", "a", "b", "c", "d", "e", "f", "g", "HH", "JJJ"}; + auto values = structs{{child0, child1, child2}, {1, 0, 1, 0, 1, 1, 1, 1, 0, 1}}; + + auto expected_keys = ints{0, 1, 2, 3}; + auto expected_ch0 = ints{1, 4, 7, 0}; + auto expected_ch1 = doubles{1.2, 4.51, -0.07, 0.0}; + auto expected_ch2 = strings{"a", "d", "g", ""}; + auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(1)); + + expected_keys = ints{0, 1, 2, 3}; + expected_ch0 = ints{0, 4, 6, 9}; + expected_ch1 = doubles{0.1, 4.51, 6.3231, 9.999}; + expected_ch2 = strings{"", "d", "f", "JJJ"}; + expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {1, 1, 1, 1}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); +} + +TEST_F(groupby_nth_element_structs_test, NestedStructs) +{ + using structs = cudf::test::structs_column_wrapper; + using ints = cudf::test::fixed_width_column_wrapper; + using doubles = cudf::test::fixed_width_column_wrapper; + using lists = cudf::test::lists_column_wrapper; + + auto keys = ints{0, 0, 0, 1, 1, 1, 2, 2, 2, 3}; + auto child0 = ints{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto child0_of_child1 = ints{0, -1, -2, -3, -4, -5, -6, -7, -8, -9}; + auto child1_of_child1 = doubles{0.1, 1.2, 2.3, 3.4, 4.51, 5.3e4, 6.3231, -0.07, 832.1, 9.999}; + auto child1 = structs{child0_of_child1, child1_of_child1}; + auto child2 = lists{{0}, {1, 2, 3}, {}, {4}, {5, 6}, {}, {}, {7}, {8, 9}, {}}; + auto values = structs{{child0, child1, child2}, {1, 0, 1, 0, 1, 1, 1, 1, 0, 1}}; + + auto expected_keys = ints{0, 1, 2, 3}; + auto expected_ch0 = ints{1, 4, 7, 0}; + auto expected_ch0_of_ch1 = ints{-1, -4, -7, 0}; + auto expected_ch1_of_ch1 = doubles{1.2, 4.51, -0.07, 0.0}; + auto expected_ch1 = structs{expected_ch0_of_ch1, expected_ch1_of_ch1}; + auto expected_ch2 = lists{{1, 2, 3}, {5, 6}, {7}, {}}; + auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {0, 1, 1, 0}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(1)); + + expected_keys = ints{0, 1, 2, 3}; + expected_ch0 = ints{0, 4, 6, 9}; + expected_ch0_of_ch1 = ints{0, -4, -6, -9}; + expected_ch1_of_ch1 = doubles{0.1, 4.51, 6.3231, 9.999}; + expected_ch1 = structs{expected_ch0_of_ch1, expected_ch1_of_ch1}; + expected_ch2 = lists{{0}, {5, 6}, {}, {}}; + expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}, {1, 1, 1, 1}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0, null_policy::EXCLUDE)); +} + +TEST_F(groupby_nth_element_structs_test, EmptyInput) +{ + using structs = cudf::test::structs_column_wrapper; + using ints = cudf::test::fixed_width_column_wrapper; + using doubles = cudf::test::fixed_width_column_wrapper; + using strings = cudf::test::strings_column_wrapper; + + auto keys = ints{}; + auto child0 = ints{}; + auto child1 = doubles{}; + auto child2 = strings{}; + auto values = structs{{child0, child1, child2}}; + + auto expected_keys = ints{}; + auto expected_ch0 = ints{}; + auto expected_ch1 = doubles{}; + auto expected_ch2 = strings{}; + auto expected_values = structs{{expected_ch0, expected_ch1, expected_ch2}}; + test_single_agg(keys, + values, + expected_keys, + expected_values, + cudf::make_nth_element_aggregation(0)); +} } // namespace test } // namespace cudf diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index da9032737f2..88318a41882 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -24,8 +24,10 @@ #include #include #include +#include #include #include +#include #include @@ -1872,4 +1874,266 @@ TYPED_TEST(DictionaryReductionTest, Quantile) output_type); } +struct ListReductionTest : public cudf::test::BaseFixture { + void reduction_test(cudf::column_view const& input_data, + cudf::column_view const& expected_value, + bool succeeded_condition, + bool is_valid, + std::unique_ptr const& agg) + { + auto statement = [&]() { + std::unique_ptr result = + cudf::reduce(input_data, agg, cudf::data_type(cudf::type_id::LIST)); + auto list_result = dynamic_cast(result.get()); + EXPECT_EQ(is_valid, list_result->is_valid()); + if (is_valid) { CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_value, list_result->view()); } + }; + + if (succeeded_condition) { + CUDF_EXPECT_NO_THROW(statement()); + } else { + EXPECT_ANY_THROW(statement()); + } + } +}; + +TEST_F(ListReductionTest, ListReductionNthElement) +{ + using LCW = cudf::test::lists_column_wrapper; + using ElementCol = cudf::test::fixed_width_column_wrapper; + + // test without nulls + LCW col{{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}; + this->reduction_test(col, + ElementCol{0, 5, -3}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); + + // test with null-exclude + std::vector validity{1, 0, 0, 1, 1, 0}; + LCW col_nulls({{-3}, {2, 1}, {0, 5, -3}, {-2}, {}, {28}}, validity.begin()); + this->reduction_test(col_nulls, + ElementCol{-2}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(1, cudf::null_policy::EXCLUDE)); + + // test with null-include + this->reduction_test(col_nulls, + ElementCol{}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE)); +} + +TEST_F(ListReductionTest, NestedListReductionNthElement) +{ + using LCW = cudf::test::lists_column_wrapper; + + // test without nulls + auto validity = std::vector{1, 0, 0, 1, 1}; + auto nested_list = LCW( + {{LCW{}, LCW{2, 3, 4}}, {}, {LCW{5}, LCW{6}, LCW{7, 8}}, {LCW{9, 10}}, {LCW{11}, LCW{12, 13}}}, + validity.begin()); + this->reduction_test(nested_list, + LCW{{}, {2, 3, 4}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); + + // test with null-include + this->reduction_test(nested_list, + LCW{}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); + + // test with null-exclude + this->reduction_test(nested_list, + LCW{{11}, {12, 13}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(2, cudf::null_policy::EXCLUDE)); +} + +TEST_F(ListReductionTest, NonValidListReductionNthElement) +{ + using LCW = cudf::test::lists_column_wrapper; + using ElementCol = cudf::test::fixed_width_column_wrapper; + + // test against col.size() <= col.null_count() + std::vector validity{0}; + this->reduction_test(LCW{{{1, 2}}, validity.begin()}, + ElementCol{}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); + + // test against empty input + this->reduction_test(LCW{}, + ElementCol{{0}, {0}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); +} + +struct StructReductionTest : public cudf::test::BaseFixture { + using SCW = cudf::test::structs_column_wrapper; + + void reduction_test(SCW const& struct_column, + cudf::table_view const& expected_value, + bool succeeded_condition, + bool is_valid, + std::unique_ptr const& agg) + { + auto statement = [&]() { + std::unique_ptr result = + cudf::reduce(struct_column, agg, cudf::data_type(cudf::type_id::STRUCT)); + auto struct_result = dynamic_cast(result.get()); + EXPECT_EQ(is_valid, struct_result->is_valid()); + if (is_valid) { CUDF_TEST_EXPECT_TABLES_EQUAL(expected_value, struct_result->view()); } + }; + + if (succeeded_condition) { + CUDF_EXPECT_NO_THROW(statement()); + } else { + EXPECT_ANY_THROW(statement()); + } + } +}; + +TEST_F(StructReductionTest, StructReductionNthElement) +{ + using ICW = cudf::test::fixed_width_column_wrapper; + + // test without nulls + auto child0 = *ICW{-3, 2, 1, 0, 5, -3, -2, 28}.release(); + auto child1 = *ICW{0, 1, 2, 3, 4, 5, 6, 7}.release(); + auto child2 = + *ICW{{-10, 10, -100, 100, -1000, 1000, -10000, 10000}, {1, 0, 0, 1, 1, 1, 0, 1}}.release(); + std::vector> input_vector; + input_vector.push_back(std::make_unique(child0)); + input_vector.push_back(std::make_unique(child1)); + input_vector.push_back(std::make_unique(child2)); + auto struct_col = SCW(std::move(input_vector)); + auto result_col0 = ICW{1}; + auto result_col1 = ICW{2}; + auto result_col2 = ICW{{0}, {0}}; + this->reduction_test( + struct_col, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(2, cudf::null_policy::INCLUDE)); + + // test with null-include + std::vector validity{1, 1, 1, 0, 1, 0, 0, 1}; + input_vector.clear(); + input_vector.push_back(std::make_unique(child0)); + input_vector.push_back(std::make_unique(child1)); + input_vector.push_back(std::make_unique(child2)); + struct_col = SCW(std::move(input_vector), validity); + result_col0 = ICW{{0}, {0}}; + result_col1 = ICW{{0}, {0}}; + result_col2 = ICW{{0}, {0}}; + this->reduction_test( + struct_col, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(6, cudf::null_policy::INCLUDE)); + + // test with null-exclude + result_col0 = ICW{{28}, {1}}; + result_col1 = ICW{{7}, {1}}; + result_col2 = ICW{{10000}, {1}}; + this->reduction_test( + struct_col, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(4, cudf::null_policy::EXCLUDE)); +} + +TEST_F(StructReductionTest, NestedStructReductionNthElement) +{ + using ICW = cudf::test::fixed_width_column_wrapper; + using LCW = cudf::test::lists_column_wrapper; + + auto int_col0 = ICW{-4, -3, -2, -1, 0}; + auto struct_col0 = SCW({int_col0}, std::vector{1, 0, 0, 1, 1}); + auto int_col1 = ICW{0, 1, 2, 3, 4}; + auto list_col = LCW{{0}, {}, {1, 2}, {3}, {4}}; + auto struct_col1 = SCW({struct_col0, int_col1, list_col}, std::vector{1, 1, 1, 0, 1}); + auto result_child0 = ICW{0}; + auto result_col0 = SCW({result_child0}, std::vector{0}); + auto result_col1 = ICW{{1}, {1}}; + auto result_col2 = LCW({LCW{}}, std::vector{1}.begin()); + // test without nulls + this->reduction_test( + struct_col1, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(1, cudf::null_policy::INCLUDE)); + + // test with null-include + result_child0 = ICW{0}; + result_col0 = SCW({result_child0}, std::vector{0}); + result_col1 = ICW{{0}, {0}}; + result_col2 = LCW({LCW{3}}, std::vector{0}.begin()); + this->reduction_test( + struct_col1, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(3, cudf::null_policy::INCLUDE)); + + // test with null-exclude + result_child0 = ICW{0}; + result_col0 = SCW({result_child0}, std::vector{1}); + result_col1 = ICW{{4}, {1}}; + result_col2 = LCW({LCW{4}}, std::vector{1}.begin()); + this->reduction_test( + struct_col1, + cudf::table_view{{result_col0, result_col1, result_col2}}, // expected_value, + true, + true, + cudf::make_nth_element_aggregation(3, cudf::null_policy::EXCLUDE)); +} + +TEST_F(StructReductionTest, NonValidStructReductionNthElement) +{ + using ICW = cudf::test::fixed_width_column_wrapper; + + // test against col.size() <= col.null_count() + auto child0 = ICW{-3, 3}; + auto child1 = ICW{0, 0}; + auto child2 = ICW{{-10, 10}, {0, 1}}; + auto struct_col = SCW{{child0, child1, child2}, {0, 0}}; + auto ret_col0 = ICW{{0}, {0}}; + auto ret_col1 = ICW{{0}, {0}}; + auto ret_col2 = ICW{{0}, {0}}; + this->reduction_test(struct_col, + cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value, + true, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); + + // test against empty input (would fail because we can not create empty struct scalar) + child0 = ICW{}; + child1 = ICW{}; + child2 = ICW{}; + struct_col = SCW{{child0, child1, child2}}; + ret_col0 = ICW{}; + ret_col1 = ICW{}; + ret_col2 = ICW{}; + this->reduction_test(struct_col, + cudf::table_view{{ret_col0, ret_col1, ret_col2}}, // expected_value, + false, + false, + cudf::make_nth_element_aggregation(0, cudf::null_policy::INCLUDE)); +} + CUDF_TEST_PROGRAM_MAIN()