From bedcfdaf87bc1919f67ca289ae66bb20e484846d Mon Sep 17 00:00:00 2001 From: davidwendt Date: Tue, 3 Nov 2020 11:49:27 -0500 Subject: [PATCH 1/6] Add cudf::dictionary::make_dictionary_pair_iterator --- CHANGELOG.md | 1 + .../cudf/dictionary/detail/iterator.cuh | 54 ++++++++++++++ cpp/include/cudf/utilities/traits.hpp | 73 +++++++++++++------ 3 files changed, 107 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbb06db444..543ba376c50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,7 @@ - PR #6614 Add support for conversion to Pandas nullable dtypes and fix related issue in `cudf.to_json` - PR #6622 Update `to_pandas` api docs - PR #6623 Add operator overloading to column and clean up error messages +- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator ## Bug Fixes diff --git a/cpp/include/cudf/dictionary/detail/iterator.cuh b/cpp/include/cudf/dictionary/detail/iterator.cuh index 070862a667c..f3f167ec7ca 100644 --- a/cpp/include/cudf/dictionary/detail/iterator.cuh +++ b/cpp/include/cudf/dictionary/detail/iterator.cuh @@ -15,6 +15,7 @@ */ #include +#include namespace cudf { namespace dictionary { @@ -61,6 +62,59 @@ auto make_dictionary_iterator(column_device_view const& dictionary_column) dictionary_access_fn{dictionary_column}); } +/** + * @brief Accessor functor for returning a dictionary pair iterator. + * + * @tparam KeyType The type of the dictionary's key element. + * @tparam has_nulls Set to true if `d_dictionary` has nulls. + * + * @throw cudf::logic_error if `has_nulls==true` and `d_dictionary` is not nullable. + */ +template +struct dictionary_access_pair_fn { + dictionary_access_pair_fn(column_device_view const& d_dictionary) : d_dictionary{d_dictionary} + { + if (has_nulls) { CUDF_EXPECTS(d_dictionary.nullable(), "unexpected non-nullable column"); } + } + + __device__ thrust::pair operator()(size_type idx) const + { + if (has_nulls && d_dictionary.is_null(idx)) return {KeyType{}, false}; + auto keys = d_dictionary.child(1); + return {keys.element(static_cast(d_dictionary.element(idx))), + true}; + }; + + private: + column_device_view const d_dictionary; +}; + +/** + * @brief Create dictionary iterator that produces key and valid element pair. + * + * The iterator returns a pair where the `first` value is + * `dictionary_column.keys[dictionary_column.indices[i]]` + * The `second` pair member is a `bool` which is set to + * `dictionary_column.is_valid(i)`. + * + * @throw cudf::logic_error if `dictionary_column` is not a dictionary column. + * + * @tparam KeyType The type of the dictionary's key element. + * @tparam has_nulls Set to true if the dictionary_column has nulls. + * + * @param dictionary_column The dictionary device view to iterate. + * @return Pair iterator with `{value,valid}` + */ +template +auto make_dictionary_pair_iterator(column_device_view const& dictionary_column) +{ + CUDF_EXPECTS(dictionary_column.type().id() == type_id::DICTIONARY32, + "Dictionary iterator is only for dictionary columns"); + return thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + dictionary_access_pair_fn{dictionary_column}); +} + } // namespace detail } // namespace dictionary } // namespace cudf diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 78dbbce517a..6903c86b16b 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -136,27 +136,6 @@ struct is_numeric_impl { } }; -/** - * @brief Indicates whether the type `T` is a unsigned numeric type. - * - * @tparam T The type to verify - * @return true `T` is unsigned numeric - * @return false `T` is signed numeric - **/ -template -constexpr inline bool is_unsigned() -{ - return std::is_unsigned::value; -} - -struct is_unsigned_impl { - template - bool operator()() - { - return is_unsigned(); - } -}; - /** * @brief Indicates whether `type` is a numeric `data_type`. * @@ -214,6 +193,26 @@ constexpr inline bool is_index_type(data_type type) return cudf::type_dispatcher(type, is_index_type_impl{}); } +/** + * @brief Indicates whether the type `T` is a unsigned numeric type. + * + * @tparam T The type to verify + * @return true `T` is unsigned numeric + * @return false `T` is signed numeric + **/ +template +constexpr inline bool is_unsigned() +{ + return std::is_unsigned::value; +} + +struct is_unsigned_impl { + template + bool operator()() + { + return is_unsigned(); + } +}; /** * @brief Indicates whether `type` is a unsigned numeric `data_type`. * @@ -435,6 +434,38 @@ constexpr inline bool is_chrono(data_type type) return cudf::type_dispatcher(type, is_chrono_impl{}); } +/** + * @brief Indicates whether the type `T` is a dictionary type. + * + * @tparam T The type to verify + * @return true `T` is a dictionary-type + * @return false `T` is not dictionary-type + **/ +template +constexpr inline bool is_dictionary() +{ + return std::is_same::value; +} + +struct is_dictionary_impl { + template + bool operator()() + { + return is_dictionary(); + } +}; + +/** + * @brief Indicates whether `type` is a dictionary `data_type`. + * + * @param type The `data_type` to verify + * @return true `type` is a dictionary type + * @return false `type` is not a dictionary type + **/ +constexpr inline bool is_dictionary(data_type type) +{ + return cudf::type_dispatcher(type, is_dictionary_impl{}); +} /** * @brief Indicates whether elements of type `T` are fixed-width. * From 32eabe42f108fb722a15c2309752ad20483d0815 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Wed, 4 Nov 2020 16:21:50 -0500 Subject: [PATCH 2/6] Add dictionary support to cudf::quantile --- CHANGELOG.md | 1 + cpp/src/quantiles/quantile.cu | 36 +++++++++++++++++++-------- cpp/tests/quantiles/quantile_test.cpp | 22 ++++++++++++++++ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 95ce1433243..35e8df52b62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -54,6 +54,7 @@ - PR #6623 Add operator overloading to column and clean up error messages - PR #6635 Add cudf::test::dictionary_column_wrapper class - PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator +- PR #6676 Add dictionary support to `cudf::quantile` ## Bug Fixes diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu index 09a8b714819..0b1ae2560b8 100644 --- a/cpp/src/quantiles/quantile.cu +++ b/cpp/src/quantiles/quantile.cu @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include #include #include @@ -62,19 +64,29 @@ struct quantile_functor { return output; } - auto d_input = column_device_view::create(input); + auto d_input = column_device_view::create(input, stream); auto d_output = mutable_column_device_view::create(output->mutable_view()); rmm::device_vector q_device{q}; - auto sorted_data = thrust::make_permutation_iterator(input.data(), ordered_indices); - - thrust::transform(q_device.begin(), - q_device.end(), - d_output->template begin(), - [sorted_data, interp = interp, size = size] __device__(double q) { - return select_quantile_data(sorted_data, size, q, interp); - }); + if (!cudf::is_dictionary(input.type())) { + auto sorted_data = thrust::make_permutation_iterator(input.data(), ordered_indices); + thrust::transform(q_device.begin(), + q_device.end(), + d_output->template begin(), + [sorted_data, interp = interp, size = size] __device__(double q) { + return select_quantile_data(sorted_data, size, q, interp); + }); + } else { + auto sorted_data = thrust::make_permutation_iterator( + dictionary::detail::make_dictionary_iterator(*d_input), ordered_indices); + thrust::transform(q_device.begin(), + q_device.end(), + d_output->template begin(), + [sorted_data, interp = interp, size = size] __device__(double q) { + return select_quantile_data(sorted_data, size, q, interp); + }); + } if (input.nullable()) { auto sorted_validity = thrust::make_transform_iterator( @@ -113,7 +125,11 @@ std::unique_ptr quantile(column_view const& input, auto functor = quantile_functor{ ordered_indices, size, q, interp, retain_types, mr, stream}; - return type_dispatcher(input.type(), functor, input); + auto input_type = cudf::is_dictionary(input.type()) && !input.is_empty() + ? dictionary_column_view(input).keys().type() + : input.type(); + + return type_dispatcher(input_type, functor, input); } } // namespace detail diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp index 9014ab27d18..76a8f6530f3 100644 --- a/cpp/tests/quantiles/quantile_test.cpp +++ b/cpp/tests/quantiles/quantile_test.cpp @@ -448,6 +448,28 @@ TYPED_TEST(QuantileUnsupportedTypesTest, TestMultipleElements) EXPECT_THROW(cudf::quantile(input, {0}), cudf::logic_error); } +struct QuantileDictionaryTest : public BaseFixture { +}; + +TEST_F(QuantileDictionaryTest, TestValid) +{ + fixed_width_column_wrapper col{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + fixed_width_column_wrapper indices{0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; + + auto result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{5.5}); + + result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR, indices); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{5.5}); + + result = cudf::quantile(col, {0.1, 0.2}, cudf::interpolation::HIGHER); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{2.0, 3.0}); + + result = cudf::quantile(col, {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), + fixed_width_column_wrapper{3.5, 5.5, 7.5}); +}; + } // anonymous namespace CUDF_TEST_PROGRAM_MAIN() From cb87204115fc4df624b39b48625de58eb5ef1942 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 5 Nov 2020 12:56:02 -0500 Subject: [PATCH 3/6] use is_dictionary in make_dictionary_iterator checks --- cpp/include/cudf/dictionary/detail/iterator.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/include/cudf/dictionary/detail/iterator.cuh b/cpp/include/cudf/dictionary/detail/iterator.cuh index f3f167ec7ca..88563f2334b 100644 --- a/cpp/include/cudf/dictionary/detail/iterator.cuh +++ b/cpp/include/cudf/dictionary/detail/iterator.cuh @@ -56,7 +56,7 @@ struct dictionary_access_fn { template auto make_dictionary_iterator(column_device_view const& dictionary_column) { - CUDF_EXPECTS(dictionary_column.type().id() == type_id::DICTIONARY32, + CUDF_EXPECTS(is_dictionary(dictionary_column.type()), "Dictionary iterator is only for dictionary columns"); return thrust::make_transform_iterator(thrust::make_counting_iterator(0), dictionary_access_fn{dictionary_column}); @@ -66,7 +66,7 @@ auto make_dictionary_iterator(column_device_view const& dictionary_column) * @brief Accessor functor for returning a dictionary pair iterator. * * @tparam KeyType The type of the dictionary's key element. - * @tparam has_nulls Set to true if `d_dictionary` has nulls. + * @tparam has_nulls Set to `true` if `d_dictionary` has nulls. * * @throw cudf::logic_error if `has_nulls==true` and `d_dictionary` is not nullable. */ @@ -100,7 +100,7 @@ struct dictionary_access_pair_fn { * @throw cudf::logic_error if `dictionary_column` is not a dictionary column. * * @tparam KeyType The type of the dictionary's key element. - * @tparam has_nulls Set to true if the dictionary_column has nulls. + * @tparam has_nulls Set to `true` if the dictionary_column has nulls. * * @param dictionary_column The dictionary device view to iterate. * @return Pair iterator with `{value,valid}` @@ -108,7 +108,7 @@ struct dictionary_access_pair_fn { template auto make_dictionary_pair_iterator(column_device_view const& dictionary_column) { - CUDF_EXPECTS(dictionary_column.type().id() == type_id::DICTIONARY32, + CUDF_EXPECTS(is_dictionary(dictionary_column.type()), "Dictionary iterator is only for dictionary columns"); return thrust::make_transform_iterator( thrust::make_counting_iterator(0), From 96d18d3257479cd5b42eaeddea10efd2f9b50e3a Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 5 Nov 2020 13:47:33 -0500 Subject: [PATCH 4/6] fix quantile dictionary test --- cpp/tests/quantiles/quantile_test.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp index 76a8f6530f3..7e150fd683a 100644 --- a/cpp/tests/quantiles/quantile_test.cpp +++ b/cpp/tests/quantiles/quantile_test.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -453,19 +454,20 @@ struct QuantileDictionaryTest : public BaseFixture { TEST_F(QuantileDictionaryTest, TestValid) { - fixed_width_column_wrapper col{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + fixed_width_column_wrapper col_w{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; + auto col = cudf::dictionary::encode(col_w); fixed_width_column_wrapper indices{0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - auto result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR); + auto result = cudf::quantile(col->view(), {0.5}, cudf::interpolation::LINEAR); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{5.5}); - result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR, indices); + result = cudf::quantile(col->view(), {0.5}, cudf::interpolation::LINEAR, indices); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{5.5}); - result = cudf::quantile(col, {0.1, 0.2}, cudf::interpolation::HIGHER); + result = cudf::quantile(col->view(), {0.1, 0.2}, cudf::interpolation::HIGHER); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{2.0, 3.0}); - result = cudf::quantile(col, {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT); + result = cudf::quantile(col->view(), {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{3.5, 5.5, 7.5}); }; From d9872ad0a039d8006bef7abfa58602d266d66060 Mon Sep 17 00:00:00 2001 From: davidwendt Date: Thu, 5 Nov 2020 20:30:05 -0500 Subject: [PATCH 5/6] fix changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7b78153113a..ed803566b72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,7 +56,6 @@ - PR #6623 Add operator overloading to column and clean up error messages - PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator - PR #6635 Add cudf::test::dictionary_column_wrapper class -- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator - PR #6676 Add dictionary support to `cudf::quantile` - PR #6609 Support fixed-point decimal for HostColumnVector From 7d3c6d3fb10167bb6ee284f701d2c1d4c43e3b5b Mon Sep 17 00:00:00 2001 From: davidwendt Date: Mon, 9 Nov 2020 11:17:26 -0500 Subject: [PATCH 6/6] use dictionary_column_wrapper --- cpp/tests/quantiles/quantile_test.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp index 7e150fd683a..63b55636b59 100644 --- a/cpp/tests/quantiles/quantile_test.cpp +++ b/cpp/tests/quantiles/quantile_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019, NVIDIA CORPORATION. + * Copyright (c) 2019-2020, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ * limitations under the License. */ -#include #include #include #include @@ -454,20 +453,19 @@ struct QuantileDictionaryTest : public BaseFixture { TEST_F(QuantileDictionaryTest, TestValid) { - fixed_width_column_wrapper col_w{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; - auto col = cudf::dictionary::encode(col_w); + dictionary_column_wrapper col{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; fixed_width_column_wrapper indices{0, 2, 4, 6, 8, 1, 3, 5, 7, 9}; - auto result = cudf::quantile(col->view(), {0.5}, cudf::interpolation::LINEAR); + auto result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{5.5}); - result = cudf::quantile(col->view(), {0.5}, cudf::interpolation::LINEAR, indices); + result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR, indices); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{5.5}); - result = cudf::quantile(col->view(), {0.1, 0.2}, cudf::interpolation::HIGHER); + result = cudf::quantile(col, {0.1, 0.2}, cudf::interpolation::HIGHER); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{2.0, 3.0}); - result = cudf::quantile(col->view(), {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT); + result = cudf::quantile(col, {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper{3.5, 5.5, 7.5}); };