Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] Add dictionary support to cudf::quantile #6676

Merged
merged 15 commits into from
Nov 11, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
- PR #6622 Update `to_pandas` api docs
- PR #6623 Add operator overloading to column and clean up error messages
- PR #6635 Add cudf::test::dictionary_column_wrapper class
- PR #6651 Add cudf::dictionary::make_dictionary_pair_iterator
- PR #6676 Add dictionary support to `cudf::quantile`

## Bug Fixes

Expand Down
54 changes: 54 additions & 0 deletions cpp/include/cudf/dictionary/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include <cudf/column/column_device_view.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>

namespace cudf {
namespace dictionary {
Expand Down Expand Up @@ -61,6 +62,59 @@ auto make_dictionary_iterator(column_device_view const& dictionary_column)
dictionary_access_fn<KeyType>{dictionary_column});
}

/**
* @brief Accessor functor for returning a dictionary pair iterator.
*
* @tparam KeyType The type of the dictionary's key element.
* @tparam has_nulls Set to true if `d_dictionary` has nulls.
*
* @throw cudf::logic_error if `has_nulls==true` and `d_dictionary` is not nullable.
*/
template <typename KeyType, bool has_nulls>
struct dictionary_access_pair_fn {
dictionary_access_pair_fn(column_device_view const& d_dictionary) : d_dictionary{d_dictionary}
{
if (has_nulls) { CUDF_EXPECTS(d_dictionary.nullable(), "unexpected non-nullable column"); }
}

__device__ thrust::pair<KeyType, bool> operator()(size_type idx) const
{
if (has_nulls && d_dictionary.is_null(idx)) return {KeyType{}, false};
auto keys = d_dictionary.child(1);
return {keys.element<KeyType>(static_cast<size_type>(d_dictionary.element<dictionary32>(idx))),
true};
};

private:
column_device_view const d_dictionary;
};

/**
* @brief Create dictionary iterator that produces key and valid element pair.
*
* The iterator returns a pair where the `first` value is
* `dictionary_column.keys[dictionary_column.indices[i]]`
* The `second` pair member is a `bool` which is set to
* `dictionary_column.is_valid(i)`.
*
* @throw cudf::logic_error if `dictionary_column` is not a dictionary column.
*
* @tparam KeyType The type of the dictionary's key element.
* @tparam has_nulls Set to true if the dictionary_column has nulls.
*
* @param dictionary_column The dictionary device view to iterate.
* @return Pair iterator with `{value,valid}`
*/
template <typename KeyType, bool has_nulls>
auto make_dictionary_pair_iterator(column_device_view const& dictionary_column)
{
CUDF_EXPECTS(dictionary_column.type().id() == type_id::DICTIONARY32,
rgsl888prabhu marked this conversation as resolved.
Show resolved Hide resolved
"Dictionary iterator is only for dictionary columns");
return thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0),
dictionary_access_pair_fn<KeyType, has_nulls>{dictionary_column});
}

} // namespace detail
} // namespace dictionary
} // namespace cudf
73 changes: 52 additions & 21 deletions cpp/include/cudf/utilities/traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,27 +136,6 @@ struct is_numeric_impl {
}
};

/**
* @brief Indicates whether the type `T` is a unsigned numeric type.
*
* @tparam T The type to verify
* @return true `T` is unsigned numeric
* @return false `T` is signed numeric
**/
template <typename T>
constexpr inline bool is_unsigned()
{
return std::is_unsigned<T>::value;
}

struct is_unsigned_impl {
template <typename T>
bool operator()()
{
return is_unsigned<T>();
}
};

/**
* @brief Indicates whether `type` is a numeric `data_type`.
*
Expand Down Expand Up @@ -214,6 +193,26 @@ constexpr inline bool is_index_type(data_type type)
return cudf::type_dispatcher(type, is_index_type_impl{});
}

/**
* @brief Indicates whether the type `T` is a unsigned numeric type.
*
* @tparam T The type to verify
* @return true `T` is unsigned numeric
* @return false `T` is signed numeric
**/
template <typename T>
constexpr inline bool is_unsigned()
{
return std::is_unsigned<T>::value;
}

struct is_unsigned_impl {
template <typename T>
bool operator()()
{
return is_unsigned<T>();
}
};
/**
* @brief Indicates whether `type` is a unsigned numeric `data_type`.
*
Expand Down Expand Up @@ -435,6 +434,38 @@ constexpr inline bool is_chrono(data_type type)
return cudf::type_dispatcher(type, is_chrono_impl{});
}

/**
* @brief Indicates whether the type `T` is a dictionary type.
*
* @tparam T The type to verify
* @return true `T` is a dictionary-type
* @return false `T` is not dictionary-type
**/
template <typename T>
constexpr inline bool is_dictionary()
{
return std::is_same<dictionary32, T>::value;
}

struct is_dictionary_impl {
template <typename T>
bool operator()()
{
return is_dictionary<T>();
}
};

/**
* @brief Indicates whether `type` is a dictionary `data_type`.
*
* @param type The `data_type` to verify
* @return true `type` is a dictionary type
* @return false `type` is not a dictionary type
**/
constexpr inline bool is_dictionary(data_type type)
{
return cudf::type_dispatcher(type, is_dictionary_impl{});
}
/**
* @brief Indicates whether elements of type `T` are fixed-width.
*
Expand Down
36 changes: 26 additions & 10 deletions cpp/src/quantiles/quantile.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <cudf/detail/gather.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/sorting.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -62,19 +64,29 @@ struct quantile_functor {
return output;
}

auto d_input = column_device_view::create(input);
auto d_input = column_device_view::create(input, stream);
auto d_output = mutable_column_device_view::create(output->mutable_view());
rgsl888prabhu marked this conversation as resolved.
Show resolved Hide resolved

rmm::device_vector<double> q_device{q};

auto sorted_data = thrust::make_permutation_iterator(input.data<T>(), ordered_indices);

thrust::transform(q_device.begin(),
q_device.end(),
d_output->template begin<Result>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
return select_quantile_data<Result>(sorted_data, size, q, interp);
});
if (!cudf::is_dictionary(input.type())) {
auto sorted_data = thrust::make_permutation_iterator(input.data<T>(), ordered_indices);
thrust::transform(q_device.begin(),
q_device.end(),
d_output->template begin<Result>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
return select_quantile_data<Result>(sorted_data, size, q, interp);
});
} else {
auto sorted_data = thrust::make_permutation_iterator(
dictionary::detail::make_dictionary_iterator<T>(*d_input), ordered_indices);
thrust::transform(q_device.begin(),
q_device.end(),
d_output->template begin<Result>(),
[sorted_data, interp = interp, size = size] __device__(double q) {
return select_quantile_data<Result>(sorted_data, size, q, interp);
});
}

if (input.nullable()) {
auto sorted_validity = thrust::make_transform_iterator(
Expand Down Expand Up @@ -113,7 +125,11 @@ std::unique_ptr<column> quantile(column_view const& input,
auto functor = quantile_functor<exact, SortMapIterator>{
ordered_indices, size, q, interp, retain_types, mr, stream};

return type_dispatcher(input.type(), functor, input);
auto input_type = cudf::is_dictionary(input.type()) && !input.is_empty()
? dictionary_column_view(input).keys().type()
: input.type();

return type_dispatcher(input_type, functor, input);
}

} // namespace detail
Expand Down
22 changes: 22 additions & 0 deletions cpp/tests/quantiles/quantile_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,28 @@ TYPED_TEST(QuantileUnsupportedTypesTest, TestMultipleElements)
EXPECT_THROW(cudf::quantile(input, {0}), cudf::logic_error);
}

struct QuantileDictionaryTest : public BaseFixture {
};

TEST_F(QuantileDictionaryTest, TestValid)
{
fixed_width_column_wrapper<int32_t> col{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
fixed_width_column_wrapper<int32_t> indices{0, 2, 4, 6, 8, 1, 3, 5, 7, 9};

auto result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR);
rgsl888prabhu marked this conversation as resolved.
Show resolved Hide resolved
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper<double>{5.5});

result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR, indices);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper<double>{5.5});

result = cudf::quantile(col, {0.1, 0.2}, cudf::interpolation::HIGHER);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper<double>{2.0, 3.0});

result = cudf::quantile(col, {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT);
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(),
fixed_width_column_wrapper<double>{3.5, 5.5, 7.5});
};

} // anonymous namespace

CUDF_TEST_PROGRAM_MAIN()