Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use device_uvector, device_span in sort groupby #7523

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/groupby/sort_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/device_uvector.hpp>

namespace cudf {
namespace groupby {
Expand All @@ -40,8 +40,8 @@ namespace sort {
* value column
*/
struct sort_groupby_helper {
using index_vector = rmm::device_vector<size_type>;
using bitmask_vector = rmm::device_vector<bitmask_type>;
using index_vector = rmm::device_uvector<size_type>;
using bitmask_vector = rmm::device_uvector<bitmask_type>;
using column_ptr = std::unique_ptr<column>;
using index_vector_ptr = std::unique_ptr<index_vector>;
using bitmask_vector_ptr = std::unique_ptr<bitmask_vector>;
Expand Down
19 changes: 10 additions & 9 deletions cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,19 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re
CUDF_FUNC_RANGE();
auto grouped_keys = helper().sorted_keys(rmm::cuda_stream_default, mr);

auto group_offsets = helper().group_offsets(0);
auto const& group_offsets = helper().group_offsets(rmm::cuda_stream_default);
std::vector<size_type> group_offsets_vector(group_offsets.size());
thrust::copy(group_offsets.begin(), group_offsets.end(), group_offsets_vector.begin());
thrust::copy(thrust::device_pointer_cast(group_offsets.begin()),
thrust::device_pointer_cast(group_offsets.end()),
group_offsets_vector.begin());

std::unique_ptr<table> grouped_values{nullptr};
if (values.num_columns()) {
grouped_values = cudf::detail::gather(values,
helper().key_sort_order(),
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
rmm::cuda_stream_default,
mr);
auto grouped_values = cudf::detail::gather(values,
helper().key_sort_order(),
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
rmm::cuda_stream_default,
mr);
return groupby::groups{
std::move(grouped_keys), std::move(group_offsets_vector), std::move(grouped_values)};
} else {
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/sort/group_argmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <groupby/sort/group_single_pass_reduction_util.cuh>

#include <cudf/detail/gather.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -27,7 +28,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_argmax(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
column_view const& key_sort_order,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/sort/group_argmin.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <groupby/sort/group_single_pass_reduction_util.cuh>

#include <cudf/detail/gather.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -27,7 +28,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_argmin(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
column_view const& key_sort_order,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/group_collect.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,21 @@
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace groupby {
namespace detail {
std::unique_ptr<column> group_collect(column_view const &values,
rmm::device_vector<size_type> const &group_offsets,
cudf::device_span<size_type const> group_offsets,
size_type num_groups,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
rmm::device_buffer offsets_data(
group_offsets.data().get(), group_offsets.size() * sizeof(cudf::size_type), stream, mr);
group_offsets.data(), group_offsets.size() * sizeof(cudf::size_type), stream, mr);

auto offsets = std::make_unique<cudf::column>(
cudf::data_type(cudf::type_to_id<cudf::size_type>()), num_groups + 1, std::move(offsets_data));
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/group_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand All @@ -29,7 +30,7 @@ namespace cudf {
namespace groupby {
namespace detail {
std::unique_ptr<column> group_count_valid(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type num_groups,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -70,7 +71,7 @@ std::unique_ptr<column> group_count_valid(column_view const& values,
return result;
}

std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& group_offsets,
std::unique_ptr<column> group_count_all(cudf::device_span<size_type const> group_offsets,
size_type num_groups,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/sort/group_max.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_max(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/sort/group_min.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_min(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/group_nth_element.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/detail/gather.cuh>
#include <cudf/detail/iterator.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -31,8 +32,8 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_nth_element(column_view const &values,
column_view const &group_sizes,
rmm::device_vector<size_type> const &group_labels,
rmm::device_vector<size_type> const &group_offsets,
cudf::device_span<size_type const> group_labels,
cudf::device_span<size_type const> group_offsets,
size_type num_groups,
size_type n,
null_policy null_handling,
Expand Down
21 changes: 11 additions & 10 deletions cpp/src/groupby/sort/group_nunique.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand All @@ -34,9 +35,9 @@ struct nunique_functor {
template <typename T>
typename std::enable_if_t<cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>>
operator()(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type const num_groups,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand All @@ -54,8 +55,8 @@ struct nunique_functor {
[v = *values_view,
equal,
null_handling,
group_offsets = group_offsets.data().get(),
group_labels = group_labels.data().get()] __device__(auto i) -> size_type {
group_offsets = group_offsets.data(),
group_labels = group_labels.data()] __device__(auto i) -> size_type {
bool is_input_countable =
(null_handling == null_policy::INCLUDE || v.is_valid_nocheck(i));
bool is_unique = is_input_countable &&
Expand All @@ -76,8 +77,8 @@ struct nunique_functor {
thrust::make_counting_iterator<size_type>(0),
[v = *values_view,
equal,
group_offsets = group_offsets.data().get(),
group_labels = group_labels.data().get()] __device__(auto i) -> size_type {
group_offsets = group_offsets.data(),
group_labels = group_labels.data()] __device__(auto i) -> size_type {
bool is_unique = group_offsets[group_labels[i]] == i || // first element or
(not equal.operator()<T>(i, i - 1)); // new unique value in sorted
return static_cast<size_type>(is_unique);
Expand All @@ -95,9 +96,9 @@ struct nunique_functor {
template <typename T>
typename std::enable_if_t<!cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>>
operator()(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type const num_groups,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand All @@ -107,9 +108,9 @@ struct nunique_functor {
};
} // namespace
std::unique_ptr<column> group_nunique(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type const num_groups,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/groupby/sort/group_quantiles.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <groupby/sort/group_reductions.hpp>
#include <quantiles/quantiles_util.hpp>
Expand Down Expand Up @@ -77,7 +78,7 @@ struct quantiles_functor {
std::enable_if_t<std::is_arithmetic<T>::value, std::unique_ptr<column>> operator()(
column_view const& values,
column_view const& group_sizes,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
size_type const num_groups,
rmm::device_vector<double> const& quantile,
interpolation interpolation,
Expand Down Expand Up @@ -110,7 +111,7 @@ struct quantiles_functor {
values_iter,
*group_size_view,
*result_view,
group_offsets.data().get(),
group_offsets.data(),
quantile.data().get(),
static_cast<size_type>(quantile.size()),
interpolation});
Expand All @@ -123,7 +124,7 @@ struct quantiles_functor {
values_iter,
*group_size_view,
*result_view,
group_offsets.data().get(),
group_offsets.data(),
quantile.data().get(),
static_cast<size_type>(quantile.size()),
interpolation});
Expand All @@ -145,7 +146,7 @@ struct quantiles_functor {
// TODO: add optional check for is_sorted. Use context.flag_sorted
std::unique_ptr<column> group_quantiles(column_view const& values,
column_view const& group_sizes,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
size_type const num_groups,
std::vector<double> const& quantiles,
interpolation interp,
Expand Down
Loading