diff --git a/cpp/include/cudf/detail/groupby.hpp b/cpp/include/cudf/detail/groupby.hpp index ce5fdb92bd1..36a76c7b6de 100644 --- a/cpp/include/cudf/detail/groupby.hpp +++ b/cpp/include/cudf/detail/groupby.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include #include +#include #include @@ -36,12 +37,12 @@ namespace hash { * @return true A hash-based groupby can be used * @return false A hash-based groupby cannot be used */ -bool can_use_hash_groupby(table_view const& keys, std::vector const& requests); +bool can_use_hash_groupby(table_view const& keys, host_span requests); // Hash-based groupby std::pair, std::vector> groupby( table_view const& keys, - std::vector const& requests, + host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 1dfacd53e0d..19f87873873 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -163,7 +164,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> aggregate( - std::vector const& requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -218,7 +219,7 @@ class groupby { * specified in `requests`. */ std::pair, std::vector> scan( - std::vector const& requests, + host_span requests, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()); /** @@ -277,18 +278,18 @@ class groupby { * aggregation requests. */ std::pair, std::vector> dispatch_aggregation( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); // Sort-based groupby std::pair, std::vector> sort_aggregate( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); std::pair, std::vector> sort_scan( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr); }; diff --git a/cpp/src/groupby/common/utils.hpp b/cpp/src/groupby/common/utils.hpp index 40bc96c6103..e8d5c60f81a 100644 --- a/cpp/src/groupby/common/utils.hpp +++ b/cpp/src/groupby/common/utils.hpp @@ -18,13 +18,14 @@ #include #include +#include #include namespace cudf { namespace groupby { namespace detail { inline std::vector extract_results( - std::vector const& requests, cudf::detail::result_cache& cache) + host_span requests, cudf::detail::result_cache& cache) { std::vector results(requests.size()); diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index cdd8ceb0a6c..0312d17a37c 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -55,7 +55,7 @@ groupby::groupby(table_view const& keys, // Select hash vs. sort groupby implementation std::pair, std::vector> groupby::dispatch_aggregation( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { @@ -79,7 +79,7 @@ groupby::~groupby() = default; namespace { /// Make an empty table with appropriate types for requested aggs -auto empty_results(std::vector const& requests) +auto empty_results(host_span requests) { std::vector empty_results; @@ -102,7 +102,7 @@ auto empty_results(std::vector const& requests) } /// Verifies the agg requested on the request's values is valid -void verify_valid_requests(std::vector const& requests) +void verify_valid_requests(host_span requests) { CUDF_EXPECTS( std::all_of( @@ -143,7 +143,7 @@ void verify_valid_requests(std::vector const& requests) // Compute aggregation requests std::pair, std::vector> groupby::aggregate( - std::vector const& requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( @@ -161,7 +161,7 @@ std::pair, std::vector> groupby::aggr // Compute scan requests std::pair, std::vector> groupby::scan( - std::vector const& requests, rmm::mr::device_memory_resource* mr) + host_span requests, rmm::mr::device_memory_resource* mr) { CUDF_FUNC_RANGE(); CUDF_EXPECTS( diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu index c54ecee9ccb..38aacbe59a7 100644 --- a/cpp/src/groupby/hash/groupby.cu +++ b/cpp/src/groupby/hash/groupby.cu @@ -110,7 +110,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final data_type result_type; cudf::detail::result_cache* sparse_results; cudf::detail::result_cache* dense_results; - rmm::device_vector const& gather_map; + device_span gather_map; size_type const map_size; Map const& map; bitmask_type const* __restrict__ row_bitmask; @@ -122,7 +122,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final column_view col, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, - rmm::device_vector const& gather_map, + device_span gather_map, size_type map_size, Map const& map, bitmask_type const* row_bitmask, @@ -272,7 +272,7 @@ class hash_compound_agg_finalizer final : public cudf::detail::aggregation_final // flatten aggs to filter in single pass aggs std::tuple, std::vector> -flatten_single_pass_aggs(std::vector const& requests) +flatten_single_pass_aggs(host_span requests) { std::vector columns; std::vector agg_kinds; @@ -311,10 +311,10 @@ flatten_single_pass_aggs(std::vector const& requests) */ template void sparse_to_dense_results(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* sparse_results, cudf::detail::result_cache* dense_results, - rmm::device_vector const& gather_map, + device_span gather_map, size_type map_size, Map const& map, bool keys_have_nulls, @@ -421,7 +421,7 @@ auto create_sparse_results_table(table_view const& flattened_values, */ template void compute_single_pass_aggs(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* sparse_results, Map& map, null_policy include_null_keys, @@ -469,10 +469,10 @@ void compute_single_pass_aggs(table_view const& keys, * `map`. */ template -std::pair, size_type> extract_populated_keys( +std::pair, size_type> extract_populated_keys( Map map, size_type num_keys, rmm::cuda_stream_view stream) { - rmm::device_vector populated_keys(num_keys); + rmm::device_uvector populated_keys(num_keys, stream); auto get_key = [] __device__(auto const& element) { size_type key, value; @@ -520,7 +520,7 @@ std::pair, size_type> extract_populated_keys( */ template std::unique_ptr groupby_null_templated(table_view const& keys, - std::vector const& requests, + host_span requests, cudf::detail::result_cache* cache, null_policy include_null_keys, rmm::cuda_stream_view stream, @@ -539,9 +539,9 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, // Extract the populated indices from the hash map and create a gather map. // Gathering using this map from sparse results will give dense results. - rmm::device_vector gather_map; - size_type map_size; - std::tie(gather_map, map_size) = extract_populated_keys(*map, keys.num_rows(), stream); + auto map_and_size = extract_populated_keys(*map, keys.num_rows(), stream); + rmm::device_uvector gather_map{std::move(map_and_size.first)}; + size_type const map_size = map_and_size.second; // Compact all results from sparse_results and insert into cache sparse_to_dense_results(keys, @@ -576,7 +576,7 @@ std::unique_ptr
groupby_null_templated(table_view const& keys, * @return true A hash-based groupby should be used * @return false A hash-based groupby should not be used */ -bool can_use_hash_groupby(table_view const& keys, std::vector const& requests) +bool can_use_hash_groupby(table_view const& keys, host_span requests) { return std::all_of(requests.begin(), requests.end(), [](aggregation_request const& r) { return std::all_of(r.aggregations.begin(), r.aggregations.end(), [](auto const& a) { @@ -588,7 +588,7 @@ bool can_use_hash_groupby(table_view const& keys, std::vector, std::vector> groupby( table_view const& keys, - std::vector const& requests, + host_span requests, null_policy include_null_keys, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index b171b19413b..86e2837967e 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -385,7 +385,7 @@ void aggregrate_result_functor::operator()(aggregation // Sort-based groupby std::pair, std::vector> groupby::sort_aggregate( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { diff --git a/cpp/src/groupby/sort/group_nth_element.cu b/cpp/src/groupby/sort/group_nth_element.cu index 5c8e8b790d4..e6c10aa1056 100644 --- a/cpp/src/groupby/sort/group_nth_element.cu +++ b/cpp/src/groupby/sort/group_nth_element.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,7 +69,7 @@ std::unique_ptr group_nth_element(column_view const &values, auto bitmask_iterator = thrust::make_transform_iterator(cudf::detail::make_validity_iterator(*values_view), [] __device__(auto b) { return static_cast(b); }); - rmm::device_vector intra_group_index(values.size()); + rmm::device_uvector intra_group_index(values.size(), stream); // intra group index for valids only. thrust::exclusive_scan_by_key(rmm::exec_policy(stream), group_labels.begin(), @@ -77,9 +77,9 @@ std::unique_ptr group_nth_element(column_view const &values, bitmask_iterator, intra_group_index.begin()); // group_size to recalculate n if n<0 - rmm::device_vector group_count = [&] { + rmm::device_uvector group_count = [&] { if (n < 0) { - rmm::device_vector group_count(num_groups); + rmm::device_uvector group_count(num_groups, stream); thrust::reduce_by_key(rmm::exec_policy(stream), group_labels.begin(), group_labels.end(), @@ -88,7 +88,7 @@ std::unique_ptr group_nth_element(column_view const &values, group_count.begin()); return group_count; } else { - return rmm::device_vector(); + return rmm::device_uvector(0, stream); } }(); // gather the valid index == n diff --git a/cpp/src/groupby/sort/group_quantiles.cu b/cpp/src/groupby/sort/group_quantiles.cu index fcadb2e71fb..c9f9e3cad9e 100644 --- a/cpp/src/groupby/sort/group_quantiles.cu +++ b/cpp/src/groupby/sort/group_quantiles.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ struct quantiles_functor { column_view const& group_sizes, cudf::device_span group_offsets, size_type const num_groups, - rmm::device_vector const& quantile, + device_span quantile, interpolation interpolation, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) @@ -112,7 +112,7 @@ struct quantiles_functor { *group_size_view, *result_view, group_offsets.data(), - quantile.data().get(), + quantile.data(), static_cast(quantile.size()), interpolation}); } else { @@ -125,7 +125,7 @@ struct quantiles_functor { *group_size_view, *result_view, group_offsets.data(), - quantile.data().get(), + quantile.data(), static_cast(quantile.size()), interpolation}); } diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp index 63de4ea8684..3d7ccf18242 100644 --- a/cpp/src/groupby/sort/scan.cpp +++ b/cpp/src/groupby/sort/scan.cpp @@ -107,7 +107,7 @@ void scan_result_functor::operator()(aggregation const& // Sort-based groupby std::pair, std::vector> groupby::sort_scan( - std::vector const& requests, + host_span requests, rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) {