diff --git a/CHANGELOG.md b/CHANGELOG.md index 066a9a0805a..c06afeaa6e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,6 +53,7 @@ - PR #6614 Add support for conversion to Pandas nullable dtypes and fix related issue in `cudf.to_json` - PR #6622 Update `to_pandas` api docs - PR #6623 Add operator overloading to column and clean up error messages +- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator - PR #6635 Add cudf::test::dictionary_column_wrapper class - PR #6609 Support fixed-point decimal for HostColumnVector diff --git a/cpp/include/cudf/dictionary/detail/iterator.cuh b/cpp/include/cudf/dictionary/detail/iterator.cuh index 070862a667c..88563f2334b 100644 --- a/cpp/include/cudf/dictionary/detail/iterator.cuh +++ b/cpp/include/cudf/dictionary/detail/iterator.cuh @@ -15,6 +15,7 @@ */ #include +#include namespace cudf { namespace dictionary { @@ -55,12 +56,65 @@ struct dictionary_access_fn { template auto make_dictionary_iterator(column_device_view const& dictionary_column) { - CUDF_EXPECTS(dictionary_column.type().id() == type_id::DICTIONARY32, + CUDF_EXPECTS(is_dictionary(dictionary_column.type()), "Dictionary iterator is only for dictionary columns"); return thrust::make_transform_iterator(thrust::make_counting_iterator(0), dictionary_access_fn{dictionary_column}); } +/** + * @brief Accessor functor for returning a dictionary pair iterator. + * + * @tparam KeyType The type of the dictionary's key element. + * @tparam has_nulls Set to `true` if `d_dictionary` has nulls. + * + * @throw cudf::logic_error if `has_nulls==true` and `d_dictionary` is not nullable. + */ +template +struct dictionary_access_pair_fn { + dictionary_access_pair_fn(column_device_view const& d_dictionary) : d_dictionary{d_dictionary} + { + if (has_nulls) { CUDF_EXPECTS(d_dictionary.nullable(), "unexpected non-nullable column"); } + } + + __device__ thrust::pair operator()(size_type idx) const + { + if (has_nulls && d_dictionary.is_null(idx)) return {KeyType{}, false}; + auto keys = d_dictionary.child(1); + return {keys.element(static_cast(d_dictionary.element(idx))), + true}; + }; + + private: + column_device_view const d_dictionary; +}; + +/** + * @brief Create dictionary iterator that produces key and valid element pair. + * + * The iterator returns a pair where the `first` value is + * `dictionary_column.keys[dictionary_column.indices[i]]` + * The `second` pair member is a `bool` which is set to + * `dictionary_column.is_valid(i)`. + * + * @throw cudf::logic_error if `dictionary_column` is not a dictionary column. + * + * @tparam KeyType The type of the dictionary's key element. + * @tparam has_nulls Set to `true` if the dictionary_column has nulls. + * + * @param dictionary_column The dictionary device view to iterate. + * @return Pair iterator with `{value,valid}` + */ +template +auto make_dictionary_pair_iterator(column_device_view const& dictionary_column) +{ + CUDF_EXPECTS(is_dictionary(dictionary_column.type()), + "Dictionary iterator is only for dictionary columns"); + return thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + dictionary_access_pair_fn{dictionary_column}); +} + } // namespace detail } // namespace dictionary } // namespace cudf diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp index 78dbbce517a..6903c86b16b 100644 --- a/cpp/include/cudf/utilities/traits.hpp +++ b/cpp/include/cudf/utilities/traits.hpp @@ -136,27 +136,6 @@ struct is_numeric_impl { } }; -/** - * @brief Indicates whether the type `T` is a unsigned numeric type. - * - * @tparam T The type to verify - * @return true `T` is unsigned numeric - * @return false `T` is signed numeric - **/ -template -constexpr inline bool is_unsigned() -{ - return std::is_unsigned::value; -} - -struct is_unsigned_impl { - template - bool operator()() - { - return is_unsigned(); - } -}; - /** * @brief Indicates whether `type` is a numeric `data_type`. * @@ -214,6 +193,26 @@ constexpr inline bool is_index_type(data_type type) return cudf::type_dispatcher(type, is_index_type_impl{}); } +/** + * @brief Indicates whether the type `T` is a unsigned numeric type. + * + * @tparam T The type to verify + * @return true `T` is unsigned numeric + * @return false `T` is signed numeric + **/ +template +constexpr inline bool is_unsigned() +{ + return std::is_unsigned::value; +} + +struct is_unsigned_impl { + template + bool operator()() + { + return is_unsigned(); + } +}; /** * @brief Indicates whether `type` is a unsigned numeric `data_type`. * @@ -435,6 +434,38 @@ constexpr inline bool is_chrono(data_type type) return cudf::type_dispatcher(type, is_chrono_impl{}); } +/** + * @brief Indicates whether the type `T` is a dictionary type. + * + * @tparam T The type to verify + * @return true `T` is a dictionary-type + * @return false `T` is not dictionary-type + **/ +template +constexpr inline bool is_dictionary() +{ + return std::is_same::value; +} + +struct is_dictionary_impl { + template + bool operator()() + { + return is_dictionary(); + } +}; + +/** + * @brief Indicates whether `type` is a dictionary `data_type`. + * + * @param type The `data_type` to verify + * @return true `type` is a dictionary type + * @return false `type` is not a dictionary type + **/ +constexpr inline bool is_dictionary(data_type type) +{ + return cudf::type_dispatcher(type, is_dictionary_impl{}); +} /** * @brief Indicates whether elements of type `T` are fixed-width. *