Merge branch 'branch-21.12' into debug-orc-assert-list

rapidsai · Oct 5, 2021 · fb4ecf3 · fb4ecf3
2 parents ad7df06 + 6593339
commit fb4ecf3
Show file tree

Hide file tree

Showing 161 changed files with 1,664 additions and 1,303 deletions.
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -331,6 +331,7 @@ add_library(cudf
     src/reductions/nth_element.cu
     src/reductions/product.cu
     src/reductions/reductions.cpp
+    src/reductions/scan/rank_scan.cu
     src/reductions/scan/scan.cpp
     src/reductions/scan/scan_exclusive.cu
     src/reductions/scan/scan_inclusive.cu

diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,7 +19,7 @@
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/copy.hpp>
-#include <cudf/detail/gather.cuh>
+#include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/device_atomics.cuh>
@@ -36,12 +36,15 @@
 #include <rmm/device_buffer.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <cub/cub.cuh>
 
 #include <algorithm>
 
-namespace {
+namespace cudf {
+namespace detail {
+
 // Compute the count of elements that pass the mask within each block
 template <typename Filter, int block_size>
 __global__ void compute_block_counts(cudf::size_type* __restrict__ block_counts,
@@ -293,9 +296,9 @@ struct scatter_gather_functor {
                     filter);
 
     auto output_table = cudf::detail::gather(cudf::table_view{{input}},
-                                             indices.begin(),
-                                             indices.end(),
+                                             indices,
                                              cudf::out_of_bounds_policy::DONT_CHECK,
+                                             cudf::detail::negative_index_policy::NOT_ALLOWED,
                                              stream,
                                              mr);
 
@@ -304,10 +307,6 @@ struct scatter_gather_functor {
   }
 };
 
-}  // namespace
-
-namespace cudf {
-namespace detail {
 /**
  * @brief Filters `input` using a Filter function object
  *

diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -152,8 +152,8 @@ __launch_bounds__(block_size) __global__
  * @param filter      Function of type `FilterFn` which determines for index `i` where to get the
  *                    corresponding output value from
  * @param out_type    `cudf::data_type` of the returned column
- * @param mr          Device memory resource used to allocate the returned column's device memory
  * @param stream      CUDA stream used for device memory operations and kernel launches.
+ * @param mr          Device memory resource used to allocate the returned column's device memory
  * @return            A new column that contains the values from either `lhs` or `rhs` as determined
  *                    by `filter[i]`
  */

diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
@@ -640,8 +640,8 @@ void gather_bitmask(table_view const& source,
  * use `DONT_CHECK` when they are certain that the gather_map contains only valid indices for
  * better performance. In case there are out-of-bound indices in the gather map, the behavior
  * is undefined. Defaults to `DONT_CHECK`.
- * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
  * @return cudf::table Result of the gather
  */
 template <typename MapIterator>

diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,10 +16,10 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/table/table_view.hpp>
-
 #include <cudf/copying.hpp>
 #include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -55,10 +55,10 @@ enum class negative_index_policy : bool { ALLOWED, NOT_ALLOWED };
  * indices. If `policy` is set to `DONT_CHECK` and there are out-of-bounds indices in `gather_map`,
  * the behavior is undefined.
  * @param[in] negative_index_policy Interpret each negative index `i` in the
- * gathermap as the positive index `i+num_source_rows`.
- * @param[in] mr Device memory resource used to allocate the returned table's device memory
+ * `gather_map` as the positive index `i+num_source_rows`.
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
- * @return cudf::table Result of the gather
+ * @param[in] mr Device memory resource used to allocate the returned table's device memory
+ * @return Result of the gather
  */
 std::unique_ptr<table> gather(
   table_view const& source_table,
@@ -67,5 +67,21 @@ std::unique_ptr<table> gather(
   negative_index_policy neg_indices,
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::detail::gather(table_view const&,column_view const&,table_view
+ * const&,cudf::out_of_bounds_policy,cudf::detail::negative_index_policy,rmm::cuda_stream_view,
+ * rmm::mr::device_memory_resource*)
+ *
+ * @throws cudf::logic_error if `gather_map` span size is larger than max of `size_type`.
+ */
+std::unique_ptr<table> gather(
+  table_view const& source_table,
+  device_span<size_type const> const gather_map,
+  out_of_bounds_policy bounds_policy,
+  negative_index_policy neg_indices,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -33,8 +33,8 @@ namespace reduction {
  *
  * @param col input column to compute sum
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Sum as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> sum(
@@ -52,8 +52,8 @@ std::unique_ptr<scalar> sum(
  *
  * @param col input column to compute minimum.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Minimum element as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> min(
@@ -71,8 +71,8 @@ std::unique_ptr<scalar> min(
  *
  * @param col input column to compute maximum.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Maximum element as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> max(
@@ -91,8 +91,8 @@ std::unique_ptr<scalar> max(
  *
  * @param col input column to compute any_of.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return bool scalar if any of elements is true when typecasted to bool
  */
 std::unique_ptr<scalar> any(
@@ -111,8 +111,8 @@ std::unique_ptr<scalar> any(
  *
  * @param col input column to compute all_of.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return bool scalar if all of elements is true when typecasted to bool
  */
 std::unique_ptr<scalar> all(
@@ -131,8 +131,8 @@ std::unique_ptr<scalar> all(
  *
  * @param col input column to compute product.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Product as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> product(
@@ -151,8 +151,8 @@ std::unique_ptr<scalar> product(
  *
  * @param col input column to compute sum of squares.
  * @param output_dtype data type of return type and typecast elements of input column
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return Sum of squares as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> sum_of_squares(
@@ -171,8 +171,8 @@ std::unique_ptr<scalar> sum_of_squares(
  *
  * @param col input column to compute mean.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @return Mean as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> mean(
@@ -191,8 +191,8 @@ std::unique_ptr<scalar> mean(
  *
  * @param col input column to compute variance.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @return Variance as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> variance(
@@ -212,8 +212,8 @@ std::unique_ptr<scalar> variance(
  *
  * @param col input column to compute standard deviation.
  * @param output_dtype data type of return type and typecast elements of input column.
- * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory.
  * @return Standard deviation as scalar of type `output_dtype`.
  */
 std::unique_ptr<scalar> standard_deviation(
@@ -243,8 +243,8 @@ std::unique_ptr<scalar> standard_deviation(
  * @param col input column to get nth element from.
  * @param n index of element to get
  * @param null_handling Indicates if null values will be counted while indexing.
- * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned scalar's device memory
  * @return nth element as scalar
  */
 std::unique_ptr<scalar> nth_element(

diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp
@@ -75,5 +75,29 @@ std::unique_ptr<column> scan_inclusive(column_view const& input,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr);
 
+/**
+ * @brief Generate row ranks for a column
+ *
+ * @param order_by Input column to generate ranks for
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return rank values
+ */
+std::unique_ptr<column> inclusive_rank_scan(column_view const& order_by,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr);
+
+/**
+ * @brief Generate row dense ranks for a column
+ *
+ * @param order_by Input column to generate ranks for
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return rank values
+ */
+std::unique_ptr<column> inclusive_dense_rank_scan(column_view const& order_by,
+                                                  rmm::cuda_stream_view stream,
+                                                  rmm::mr::device_memory_resource* mr);
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 #include <cudf/column/column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -70,6 +70,20 @@ std::unique_ptr<table> scatter(
   rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::detail::scatter(table_view const&,column_view const&,table_view
+ * const&,bool,rmm::cuda_stream_view,rmm::mr::device_memory_resource*)
+ *
+ * @throws cudf::logic_error if `scatter_map` span size is larger than max of `size_type`.
+ */
+std::unique_ptr<table> scatter(
+  table_view const& source,
+  device_span<size_type const> const scatter_map,
+  table_view const& target,
+  bool check_bounds                   = false,
+  rmm::cuda_stream_view stream        = rmm::cuda_stream_default,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Scatters a row of scalar values into a copy of the target table
  * according to a scatter map.

diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp
@@ -32,8 +32,8 @@ namespace detail {
  * @throw cudf::logic_error if dictionary column keys are not all the same type.
  *
  * @param columns Vector of dictionary columns to concatenate.
- * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(

diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -98,8 +98,8 @@ std::vector<std::unique_ptr<column>> match_dictionaries(
  * Any null rows are left unchanged.
  *
  * @param input Vector of cudf::table_views that include dictionary columns to be matched.
- * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary columns and updated cudf::table_views.
  */
 std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_dictionaries(

diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -37,8 +37,8 @@ namespace detail {
  * ```
  *
  * @param columns List of string columns to concatenate.
- * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with concatenated results.
  */
 std::unique_ptr<column> concatenate(

diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh
@@ -47,8 +47,8 @@ namespace detail {
  * @param rhs_begin Strings of second set of data. Used when filter_fn returns false.
  * @param filter_fn Called to determine which iterator (lhs or rhs) to retrieve an entry for a
  * specific row.
- * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 template <typename StringPairIterLeft, typename StringPairIterRight, typename Filter>

diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh
@@ -93,8 +93,8 @@ namespace detail {
  * @param target_begin The starting index of the target range (inclusive)
  * @param target_end The index of the last element in the target range
  * (exclusive)
- * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return std::unique_ptr<column> The result target column
  */
 template <typename SourceValueIterator, typename SourceValidityIterator>

diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
@@ -37,8 +37,8 @@ namespace detail {
  * @param begin First row index to include the new string.
  * @param end Last row index (exclusive).
  * @param value String to use when filling the range.
- * @param mr Device memory resource used to allocate the returned column's device memory.
  * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
 std::unique_ptr<column> fill(