Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Add cudf::dictionary::make_dictionary_pair_iterator #6651

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
- PR #6614 Add support for conversion to Pandas nullable dtypes and fix related issue in `cudf.to_json`
- PR #6622 Update `to_pandas` api docs
- PR #6623 Add operator overloading to column and clean up error messages
- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator
- PR #6635 Add cudf::test::dictionary_column_wrapper class
- PR #6609 Support fixed-point decimal for HostColumnVector

Expand Down
56 changes: 55 additions & 1 deletion cpp/include/cudf/dictionary/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <cudf/column/column_device_view.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>

namespace cudf {
namespace dictionary {
Expand Down Expand Up @@ -55,12 +56,65 @@ struct dictionary_access_fn {
template <typename KeyType>
auto make_dictionary_iterator(column_device_view const& dictionary_column)
{
CUDF_EXPECTS(dictionary_column.type().id() == type_id::DICTIONARY32,
CUDF_EXPECTS(is_dictionary(dictionary_column.type()),
"Dictionary iterator is only for dictionary columns");
return thrust::make_transform_iterator(thrust::make_counting_iterator<size_type>(0),
dictionary_access_fn<KeyType>{dictionary_column});
}

/**
* @brief Accessor functor for returning a dictionary pair iterator.
*
* @tparam KeyType The type of the dictionary's key element.
* @tparam has_nulls Set to `true` if `d_dictionary` has nulls.
*
* @throw cudf::logic_error if `has_nulls==true` and `d_dictionary` is not nullable.
*/
template <typename KeyType, bool has_nulls>
struct dictionary_access_pair_fn {
dictionary_access_pair_fn(column_device_view const& d_dictionary) : d_dictionary{d_dictionary}
{
if (has_nulls) { CUDF_EXPECTS(d_dictionary.nullable(), "unexpected non-nullable column"); }
}

__device__ thrust::pair<KeyType, bool> operator()(size_type idx) const
{
if (has_nulls && d_dictionary.is_null(idx)) return {KeyType{}, false};
auto keys = d_dictionary.child(1);
return {keys.element<KeyType>(static_cast<size_type>(d_dictionary.element<dictionary32>(idx))),
true};
};

private:
column_device_view const d_dictionary;
};

/**
* @brief Create dictionary iterator that produces key and valid element pair.
*
* The iterator returns a pair where the `first` value is
* `dictionary_column.keys[dictionary_column.indices[i]]`
* The `second` pair member is a `bool` which is set to
* `dictionary_column.is_valid(i)`.
*
* @throw cudf::logic_error if `dictionary_column` is not a dictionary column.
*
* @tparam KeyType The type of the dictionary's key element.
* @tparam has_nulls Set to `true` if the dictionary_column has nulls.
*
* @param dictionary_column The dictionary device view to iterate.
* @return Pair iterator with `{value,valid}`
*/
template <typename KeyType, bool has_nulls>
auto make_dictionary_pair_iterator(column_device_view const& dictionary_column)
{
CUDF_EXPECTS(is_dictionary(dictionary_column.type()),
"Dictionary iterator is only for dictionary columns");
return thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0),
dictionary_access_pair_fn<KeyType, has_nulls>{dictionary_column});
}

} // namespace detail
} // namespace dictionary
} // namespace cudf
73 changes: 52 additions & 21 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,27 +136,6 @@ struct is_numeric_impl {
}
};

/**
* @brief Indicates whether the type `T` is a unsigned numeric type.
*
* @tparam T The type to verify
* @return true `T` is unsigned numeric
* @return false `T` is signed numeric
**/
template <typename T>
constexpr inline bool is_unsigned()
{
return std::is_unsigned<T>::value;
}

struct is_unsigned_impl {
template <typename T>
bool operator()()
{
return is_unsigned<T>();
}
};

codereport marked this conversation as resolved.
Show resolved Hide resolved
/**
* @brief Indicates whether `type` is a numeric `data_type`.
*
Expand Down Expand Up @@ -214,6 +193,26 @@ constexpr inline bool is_index_type(data_type type)
return cudf::type_dispatcher(type, is_index_type_impl{});
}

/**
* @brief Indicates whether the type `T` is a unsigned numeric type.
*
* @tparam T The type to verify
* @return true `T` is unsigned numeric
* @return false `T` is signed numeric
**/
template <typename T>
constexpr inline bool is_unsigned()
{
return std::is_unsigned<T>::value;
}

struct is_unsigned_impl {
template <typename T>
bool operator()()
{
return is_unsigned<T>();
}
};
/**
* @brief Indicates whether `type` is a unsigned numeric `data_type`.
*
Expand Down Expand Up @@ -435,6 +434,38 @@ constexpr inline bool is_chrono(data_type type)
return cudf::type_dispatcher(type, is_chrono_impl{});
}

/**
* @brief Indicates whether the type `T` is a dictionary type.
*
* @tparam T The type to verify
* @return true `T` is a dictionary-type
* @return false `T` is not dictionary-type
**/
template <typename T>
constexpr inline bool is_dictionary()
{
return std::is_same<dictionary32, T>::value;
}

struct is_dictionary_impl {
template <typename T>
bool operator()()
{
return is_dictionary<T>();
}
};

/**
* @brief Indicates whether `type` is a dictionary `data_type`.
*
* @param type The `data_type` to verify
* @return true `type` is a dictionary type
* @return false `type` is not a dictionary type
**/
constexpr inline bool is_dictionary(data_type type)
{
return cudf::type_dispatcher(type, is_dictionary_impl{});
}
/**
* @brief Indicates whether elements of type `T` are fixed-width.
*
Expand Down