diff --git a/cpp/src/groupby/sort/group_scan_util.cuh b/cpp/src/groupby/sort/group_scan_util.cuh index 2efe14f70ca..14e5195bb79 100644 --- a/cpp/src/groupby/sort/group_scan_util.cuh +++ b/cpp/src/groupby/sort/group_scan_util.cuh @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include @@ -26,8 +26,6 @@ #include #include #include -#include -#include #include #include #include @@ -192,43 +190,18 @@ struct group_scan_functor{null_precedence}); - auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); - auto const flattened_null_precedences = - (K == aggregation::MIN) - ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) - : rmm::device_uvector(0, stream); + // Create a gather map containing indices of the prefix min/max elements within each group. + auto gather_map = rmm::device_uvector(values.size(), stream); - // Create a gather map contaning indices of the prefix min/max elements. - auto gather_map = rmm::device_uvector(values.size(), stream); - auto const map_begin = gather_map.begin(); - - // Perform segmented scan. - auto const do_scan = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { - thrust::inclusive_scan_by_key(rmm::exec_policy(stream), - group_labels.begin(), - group_labels.end(), - inp_iter, - out_iter, - thrust::equal_to{}, - binop); - }; - - // Find the indices of the prefix min/max elements within each group. - auto const count_iter = thrust::make_counting_iterator(0); - auto const binop = cudf::reduction::detail::row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - values.has_nulls(), - flattened_null_precedences.data(), - K == aggregation::MIN); - do_scan(count_iter, map_begin, binop); - - auto gather_map_view = - column_view(data_type{type_to_id()}, gather_map.size(), gather_map.data()); + auto const binop_generator = + cudf::reduction::detail::comparison_binop_generator::create(values, stream); + thrust::inclusive_scan_by_key(rmm::exec_policy(stream), + group_labels.begin(), + group_labels.end(), + thrust::make_counting_iterator(0), + gather_map.begin(), + thrust::equal_to{}, + binop_generator.binop()); // // Gather the children elements of the prefix min/max struct elements first. @@ -240,7 +213,7 @@ struct group_scan_functor{values.child_begin(), values.child_end()}), - gather_map_view, + gather_map, cudf::out_of_bounds_policy::DONT_CHECK, cudf::detail::negative_index_policy::NOT_ALLOWED, stream, diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh index 4fde825c0e0..ffc6032dfa1 100644 --- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh +++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh @@ -16,15 +16,13 @@ #pragma once -#include +#include #include #include #include #include #include -#include -#include #include #include #include @@ -244,18 +242,6 @@ struct group_reduction_functor< if (values.is_empty()) { return result; } - // When finding ARGMIN, we need to consider nulls as larger than non-null elements. - // Thing is opposite for ARGMAX. - auto const null_precedence = - (K == aggregation::ARGMIN) ? null_order::AFTER : null_order::BEFORE; - auto const flattened_values = structs::detail::flatten_nested_columns( - table_view{{values}}, {}, std::vector{null_precedence}); - auto const d_flattened_values_ptr = table_device_view::create(flattened_values, stream); - auto const flattened_null_precedences = - (K == aggregation::ARGMIN) - ? cudf::detail::make_device_uvector_async(flattened_values.null_orders(), stream) - : rmm::device_uvector(0, stream); - // Perform segmented reduction to find ARGMIN/ARGMAX. auto const do_reduction = [&](auto const& inp_iter, auto const& out_iter, auto const& binop) { thrust::reduce_by_key(rmm::exec_policy(stream), @@ -270,12 +256,9 @@ struct group_reduction_functor< auto const count_iter = thrust::make_counting_iterator(0); auto const result_begin = result->mutable_view().template begin(); - auto const binop = cudf::reduction::detail::row_arg_minmax_fn(values.size(), - *d_flattened_values_ptr, - values.has_nulls(), - flattened_null_precedences.data(), - K == aggregation::ARGMIN); - do_reduction(count_iter, result_begin, binop); + auto const binop_generator = + cudf::reduction::detail::comparison_binop_generator::create(values, stream); + do_reduction(count_iter, result_begin, binop_generator.binop()); if (values.has_nulls()) { // Generate bitmask for the output by segmented reduction of the input bitmask. diff --git a/cpp/src/reductions/arg_minmax_util.cuh b/cpp/src/reductions/arg_minmax_util.cuh deleted file mode 100644 index 5694d0ed0fa..00000000000 --- a/cpp/src/reductions/arg_minmax_util.cuh +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -namespace cudf { -namespace reduction { -namespace detail { - -/** - * @brief Binary operator ArgMin/ArgMax with index values into the input table. - */ -struct row_arg_minmax_fn { - size_type const num_rows; - row_lexicographic_comparator const comp; - bool const arg_min; - - row_arg_minmax_fn(size_type const num_rows, - table_device_view const& table, - bool has_nulls, - null_order const* null_precedence, - bool const arg_min) - : num_rows(num_rows), - comp(nullate::DYNAMIC{has_nulls}, table, table, nullptr, null_precedence), - arg_min(arg_min) - { - } - - // This function is explicitly prevented from inlining, because it calls to - // `row_lexicographic_comparator::operator()` which is inlined and very heavy-weight. As a result, - // instantiating this functor will result in huge code, and objects of this functor used with - // `thrust::reduce_by_key` or `thrust::scan_by_key` will result in significant compile time. - __attribute__((noinline)) __device__ auto operator()(size_type lhs_idx, size_type rhs_idx) const - { - // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and - // github.com/NVIDIA/thrust/issues/1525 - // where invalid random values may be passed here by thrust::reduce_by_key - if (lhs_idx < 0 || lhs_idx >= num_rows) { return rhs_idx; } - if (rhs_idx < 0 || rhs_idx >= num_rows) { return lhs_idx; } - - // Return `lhs_idx` iff: - // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or - // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. - return comp(lhs_idx, rhs_idx) == arg_min ? lhs_idx : rhs_idx; - } -}; - -} // namespace detail -} // namespace reduction -} // namespace cudf diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu index 5c2b686fd9c..809f3506c67 100644 --- a/cpp/src/reductions/scan/scan_inclusive.cu +++ b/cpp/src/reductions/scan/scan_inclusive.cu @@ -14,8 +14,8 @@ * limitations under the License. */ -#include #include +#include #include #include @@ -23,8 +23,6 @@ #include #include #include -#include -#include #include #include @@ -159,35 +157,15 @@ struct scan_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - // Op is used only to determined if we want to find the min or max element. - auto constexpr is_min_op = std::is_same_v; - - // Build indices of the scan operation results (ARGMIN/ARGMAX). - // When finding ARGMIN, we need to consider nulls as larger than non-null elements, and the - // opposite for ARGMAX. - auto gather_map = rmm::device_uvector(input.size(), stream); - auto const do_scan = [&](auto const& binop) { - thrust::inclusive_scan(rmm::exec_policy(stream), - thrust::counting_iterator(0), - thrust::counting_iterator(input.size()), - gather_map.begin(), - binop); - }; - - auto constexpr null_precedence = is_min_op ? cudf::null_order::AFTER : cudf::null_order::BEFORE; - auto const flattened_input = cudf::structs::detail::flatten_nested_columns( - table_view{{input}}, {}, std::vector{null_precedence}); - auto const d_flattened_input_ptr = table_device_view::create(flattened_input, stream); - auto const flattened_null_precedences = - is_min_op ? cudf::detail::make_device_uvector_async(flattened_input.null_orders(), stream) - : rmm::device_uvector(0, stream); - - auto const binop = cudf::reduction::detail::row_arg_minmax_fn(input.size(), - *d_flattened_input_ptr, - input.has_nulls(), - flattened_null_precedences.data(), - is_min_op); - do_scan(binop); + // Create a gather map contaning indices of the prefix min/max elements. + auto gather_map = rmm::device_uvector(input.size(), stream); + auto const binop_generator = + cudf::reduction::detail::comparison_binop_generator::create(input, stream); + thrust::inclusive_scan(rmm::exec_policy(stream), + thrust::counting_iterator(0), + thrust::counting_iterator(input.size()), + gather_map.begin(), + binop_generator.binop()); // Gather the children columns of the input column. Must use `get_sliced_child` to properly // handle input in case it is a sliced view. diff --git a/cpp/src/reductions/simple.cuh b/cpp/src/reductions/simple.cuh index 642531434ae..8f76a320b7e 100644 --- a/cpp/src/reductions/simple.cuh +++ b/cpp/src/reductions/simple.cuh @@ -16,13 +16,12 @@ #pragma once -#include +#include #include #include #include #include -#include #include #include #include @@ -294,37 +293,14 @@ struct same_element_type_dispatcher { { if (input.is_empty()) { return cudf::make_empty_scalar_like(input, stream, mr); } - auto constexpr is_min_op = std::is_same_v; - // We will do reduction to find the ARGMIN/ARGMAX index, then return the element at that index. - // When finding ARGMIN, we need to consider nulls as larger than non-null elements, and the - // opposite for ARGMAX. - auto constexpr null_precedence = is_min_op ? cudf::null_order::AFTER : cudf::null_order::BEFORE; - auto const flattened_input = cudf::structs::detail::flatten_nested_columns( - table_view{{input}}, {}, std::vector{null_precedence}); - auto const d_flattened_input_ptr = table_device_view::create(flattened_input, stream); - auto const flattened_null_precedences = - is_min_op ? cudf::detail::make_device_uvector_async(flattened_input.null_orders(), stream) - : rmm::device_uvector(0, stream); - - // Perform reduction to find ARGMIN/ARGMAX. - auto const do_reduction = [&](auto const& binop) { - return thrust::reduce(rmm::exec_policy(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.size()), - size_type{0}, - binop); - }; - - auto const minmax_idx = [&] { - auto const binop = - cudf::reduction::detail::row_arg_minmax_fn(input.size(), - *d_flattened_input_ptr, - input.has_nulls(), - flattened_null_precedences.data(), - is_min_op); - return do_reduction(binop); - }(); + auto const binop_generator = + cudf::reduction::detail::comparison_binop_generator::create(input, stream); + auto const minmax_idx = thrust::reduce(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + size_type{0}, + binop_generator.binop()); return cudf::detail::get_element(input, minmax_idx, stream, mr); } diff --git a/cpp/src/reductions/struct_minmax_util.cuh b/cpp/src/reductions/struct_minmax_util.cuh new file mode 100644 index 00000000000..8a7e94ea4ca --- /dev/null +++ b/cpp/src/reductions/struct_minmax_util.cuh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace cudf { +namespace reduction { +namespace detail { + +/** + * @brief Binary operator ArgMin/ArgMax with index values into the input table. + */ +struct row_arg_minmax_fn { + size_type const num_rows; + row_lexicographic_comparator const comp; + bool const arg_min; + + row_arg_minmax_fn(table_device_view const& table, + bool has_nulls, + null_order const* null_precedence, + bool const arg_min) + : num_rows(table.num_rows()), + comp(nullate::DYNAMIC{has_nulls}, table, table, nullptr, null_precedence), + arg_min(arg_min) + { + } + + // This function is explicitly prevented from inlining, because it calls to + // `row_lexicographic_comparator::operator()` which is inlined and very heavy-weight. As a result, + // instantiating this functor will result in huge code, and objects of this functor used with + // `thrust::reduce_by_key` or `thrust::scan_by_key` will result in significant compile time. + __attribute__((noinline)) __device__ auto operator()(size_type lhs_idx, size_type rhs_idx) const + { + // The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and + // github.com/NVIDIA/thrust/issues/1525 + // where invalid random values may be passed here by thrust::reduce_by_key + if (lhs_idx < 0 || lhs_idx >= num_rows) { return rhs_idx; } + if (rhs_idx < 0 || rhs_idx >= num_rows) { return lhs_idx; } + + // Return `lhs_idx` iff: + // row(lhs_idx) < row(rhs_idx) and finding ArgMin, or + // row(lhs_idx) >= row(rhs_idx) and finding ArgMax. + return comp(lhs_idx, rhs_idx) == arg_min ? lhs_idx : rhs_idx; + } +}; + +/** + * @brief The null order when comparing a null with non-null elements. Currently support only the + * default null order: nulls are compared as LESS than any other non-null elements. + */ +auto static constexpr DEFAULT_NULL_ORDER = cudf::null_order::BEFORE; + +/** + * @brief The utility class to provide a binary operator object for lexicographic comparison of + * struct elements. + * + * The input of this class is a structs column. Using the binary operator provided by this class, + * nulls STRUCT are compared as larger than all other non-null STRUCT elements - if finding for + * ARGMIN, or smaller than all other non-null STRUCT elements - if finding for ARGMAX. This helps + * achieve the results of finding the min or max STRUCT element when nulls are excluded from the + * operations, returning null only when all the input elements are nulls. + */ +class comparison_binop_generator { + private: + cudf::structs::detail::flattened_table const flattened_input; + std::unique_ptr> const + d_flattened_input_ptr; + bool const is_min_op; + bool const has_nulls; + + std::vector null_orders; + rmm::device_uvector null_orders_dvec; + + comparison_binop_generator(column_view const& input, rmm::cuda_stream_view stream, bool is_min_op) + : flattened_input{cudf::structs::detail::flatten_nested_columns( + table_view{{input}}, {}, std::vector{DEFAULT_NULL_ORDER})}, + d_flattened_input_ptr{table_device_view::create(flattened_input, stream)}, + is_min_op(is_min_op), + has_nulls{input.has_nulls()}, + null_orders_dvec(0, stream) + { + if (is_min_op) { + null_orders = flattened_input.null_orders(); + // Null structs are excluded from the operations, and that is equivalent to considering + // nulls as larger than all other non-null STRUCT elements (if finding for ARGMIN), or + // smaller than all other non-null STRUCT elements (if finding for ARGMAX). + // Thus, we need to set a separate null order for the top level structs column (which is + // stored at the first position in the null_orders array) to achieve this purpose. + null_orders.front() = cudf::null_order::AFTER; + null_orders_dvec = cudf::detail::make_device_uvector_async(null_orders, stream); + } + // else: Don't need to generate nulls order to copy to device memory if we have all null orders + // are BEFORE (that happens when we have is_min_op == false). + } + + public: + auto binop() const + { + return row_arg_minmax_fn(*d_flattened_input_ptr, has_nulls, null_orders_dvec.data(), is_min_op); + } + + template + static auto create(column_view const& input, rmm::cuda_stream_view stream) + { + return comparison_binop_generator( + input, + stream, + std::is_same_v || std::is_same_v); + } + + template + static auto create(column_view const& input, rmm::cuda_stream_view stream) + + { + return comparison_binop_generator( + input, stream, K == cudf::aggregation::MIN || K == cudf::aggregation::ARGMIN); + } +}; + +} // namespace detail +} // namespace reduction +} // namespace cudf diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp index e138cd6f68e..e1c426990eb 100644 --- a/cpp/tests/reductions/reduction_tests.cpp +++ b/cpp/tests/reductions/reduction_tests.cpp @@ -2301,28 +2301,32 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls) using INTS_CW = cudf::test::fixed_width_column_wrapper; using STRINGS_CW = cudf::test::strings_column_wrapper; using STRUCTS_CW = cudf::test::structs_column_wrapper; + using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; + // `null` means null at child column. + // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*NULL*/, + "₹1" /*null*/, "aaa" /*NULL*/, "zit", "bat", "aab", - "$1" /*NULL*/, + "$1" /*null*/, "€1" /*NULL*/, "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*NULL*/, 4 /*NULL*/, 5, 6, 7, 8 /*NULL*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); { - auto const expected_child1 = STRINGS_CW{"aab"}; - auto const expected_child2 = INTS_CW{7}; + // In the structs column, the min struct is {null, null}. + auto const expected_child1 = STRINGS_CW{{""}, null_at(0)}; + auto const expected_child2 = INTS_CW{{8}, null_at(0)}; this->reduction_test(input, cudf::table_view{{expected_child1, expected_child2}}, true, diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp index 0892436eb47..8dee5160fd7 100644 --- a/cpp/tests/reductions/scan_tests.cpp +++ b/cpp/tests/reductions/scan_tests.cpp @@ -488,30 +488,52 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) using INTS_CW = cudf::test::fixed_width_column_wrapper; using STRINGS_CW = cudf::test::strings_column_wrapper; using STRUCTS_CW = cudf::test::structs_column_wrapper; + using cudf::test::iterators::null_at; using cudf::test::iterators::nulls_at; + // `null` means null at child column. + // `NULL` means null at parent column. auto const input = [] { auto child1 = STRINGS_CW{{"año", "bit", - "₹1" /*NULL*/, + "₹1" /*null*/, "aaa" /*NULL*/, "zit", "bat", "aab", - "$1" /*NULL*/, + "$1" /*null*/, "€1" /*NULL*/, "wut"}, nulls_at({2, 7})}; - auto child2 = INTS_CW{{1, 2, 3 /*NULL*/, 4 /*NULL*/, 5, 6, 7, 8 /*NULL*/, 9 /*NULL*/, 10}, + auto child2 = INTS_CW{{1, 2, 3 /*null*/, 4 /*NULL*/, 5, 6, 7, 8 /*null*/, 9 /*NULL*/, 10}, nulls_at({2, 7})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); { auto const expected = [] { - auto child1 = STRINGS_CW{ - "año", "año", "año", "" /*NULL*/, "año", "año", "aab", "aab", "" /*NULL*/, "aab"}; - auto child2 = INTS_CW{1, 1, 1, 0 /*NULL*/, 1, 1, 7, 7, 0 /*NULL*/, 7}; + auto child1 = STRINGS_CW{{"año", + "año", + "" /*null*/, + "" /*null*/, + "" /*null*/, + "" /*null*/, + "" /*null*/, + "" /*null*/, + "" /*null*/, + "" /*null*/}, + nulls_at({2, 3, 4, 5, 6, 7, 8, 9})}; + auto child2 = INTS_CW{{1, + 1, + 0 /*null*/, + 0 /*null*/, + 0 /*null*/, + 0 /*null*/, + 0 /*null*/, + 0 /*null*/, + 0 /*null*/, + 0 /*null*/}, + nulls_at({2, 3, 4, 5, 6, 7, 8, 9})}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 8})}; }(); @@ -535,26 +557,28 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls) { auto const expected = [] { - auto child1 = STRINGS_CW{"año", - "año", - "año", - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/, - "" /*NULL*/}; - auto child2 = INTS_CW{1, - 1, - 1, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/, - 0 /*NULL*/}; + auto child1 = STRINGS_CW{{"año", + "año", + "" /*null*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/, + "" /*NULL*/}, + null_at(2)}; + auto child2 = INTS_CW{{1, + 1, + 0 /*null*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/, + 0 /*NULL*/}, + null_at(2)}; return STRUCTS_CW{{child1, child2}, nulls_at({3, 4, 5, 6, 7, 8, 9})}; }();