rapidsai · rapids-bot · Mar 9, 2021 · Mar 4, 2021 · Mar 5, 2021 · Mar 5, 2021
@@ -115,6 +115,7 @@ ConfigureBench(REDUCTION_BENCH
   reduction/anyall_benchmark.cpp
   reduction/dictionary_benchmark.cpp
   reduction/reduce_benchmark.cpp
+  reduction/scan_benchmark.cpp
   reduction/minmax_benchmark.cpp)
 
 ###################################################################################################

@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmark/benchmark.h>
+#include <benchmarks/common/generate_benchmark_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/synchronization/synchronization.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/reduction.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/types.hpp>
+
+class ReductionScan : public cudf::benchmark {
+};
+
+template <typename type>
+static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
+{
+  cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
+  auto const dtype = cudf::type_to_id<type>();
+  auto const table = create_random_table({dtype}, 1, row_count{n_rows});
+  if (!include_nulls) table->get_column(0).set_null_mask(rmm::device_buffer{}, 0);
+  cudf::column_view input(table->view().column(0));
+
+  for (auto _ : state) {
+    cuda_event_timer timer(state, true);
+    auto result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE);
+  }
+}
+
+#define SCAN_BENCHMARK_DEFINE(name, type, nulls)                          \
+  BENCHMARK_DEFINE_F(ReductionScan, name)                                 \
+  (::benchmark::State & state) { BM_reduction_scan<type>(state, nulls); } \
+  BENCHMARK_REGISTER_F(ReductionScan, name)                               \
+    ->UseManualTime()                                                     \
+    ->Arg(10000)      /* 10k */                                           \
+    ->Arg(100000)     /* 100k */                                          \
+    ->Arg(1000000)    /* 1M */                                            \
+    ->Arg(10000000)   /* 10M */                                           \
+    ->Arg(100000000); /* 100M */
+
+SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false);
+SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false);
+SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false);
+SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false);
+SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true);
+SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true);
+SCAN_BENCHMARK_DEFINE(double_nulls, double, true);
@@ -21,11 +21,10 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/utilities/device_atomics.cuh>
+#include <cudf/detail/utilities/device_operators.cuh>
 #include <cudf/null_mask.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/span.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -34,6 +33,32 @@
 
 namespace cudf {
 namespace detail {
+
+namespace {
+/**
+ * @brief Accessor handles both nullable and non-nullable columns.
+ *
+ * @tparam Element type used for null-replacement value
+ */
+template <typename Element>
+struct null_replace_accessor {
+  column_device_view const col;      ///< column view of column in device
+  Element const null_replacement{};  ///< value returned when element is null
+  bool const has_nulls;              ///< true if col has null elements
+
+  null_replace_accessor(column_device_view const& _col, Element null_val, bool has_nulls)
+    : col{_col}, null_replacement{null_val}, has_nulls(has_nulls)
+  {
+    CUDF_EXPECTS(data_type(type_to_id<Element>()) == col.type(), "the data type mismatch");
+    if (has_nulls) CUDF_EXPECTS(_col.nullable(), "column with nulls must have a validity bitmask");
+  }
+  __device__ Element operator()(cudf::size_type i) const
+  {
+    return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element<Element>(i);
+  }
+};
+}  // namespace
+
 /**
  * @brief Dispatcher for running Scan operation on input column
  * Dispatches scan operation on `Op` and creates output column
@@ -73,23 +98,14 @@ struct scan_dispatcher {
     mutable_column_view output = output_column->mutable_view();
     auto d_input               = column_device_view::create(input_view, stream);
 
-    if (input_view.has_nulls()) {
-      auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
-      thrust::exclusive_scan(rmm::exec_policy(stream),
-                             input,
-                             input + size,
-                             output.data<T>(),
-                             Op::template identity<T>(),
-                             Op{});
-    } else {
-      auto input = d_input->begin<T>();
-      thrust::exclusive_scan(rmm::exec_policy(stream),
-                             input,
-                             input + size,
-                             output.data<T>(),
-                             Op::template identity<T>(),
-                             Op{});
-    }
+    auto input = make_counting_transform_iterator(
+      0, null_replace_accessor<T>{*d_input, Op::template identity<T>(), input_view.has_nulls()});
+    thrust::exclusive_scan(rmm::exec_policy(stream),
+                           input,
+                           input + size,
+                           output.data<T>(),
+                           Op::template identity<T>(),
+                           Op{});
 
     CHECK_CUDA(stream.value());
     return output_column;
@@ -147,13 +163,9 @@ struct scan_dispatcher {
     auto d_input               = column_device_view::create(input_view, stream);
     mutable_column_view output = output_column->mutable_view();
 
-    if (input_view.has_nulls()) {
-      auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
-      thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
-    } else {
-      auto input = d_input->begin<T>();
-      thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
-    }
+    auto const input = make_counting_transform_iterator(
+      0, null_replace_accessor<T>{*d_input, Op::template identity<T>(), input_view.has_nulls()});
+    thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
 
     CHECK_CUDA(stream.value());
     return output_column;
@@ -171,13 +183,10 @@ struct scan_dispatcher {
 
     auto d_input = column_device_view::create(input_view, stream);
 
-    if (input_view.has_nulls()) {
-      auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
-      thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
-    } else {
-      auto input = d_input->begin<T>();
-      thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
-    }
+    auto input = make_counting_transform_iterator(
+      0, null_replace_accessor<T>{*d_input, Op::template identity<T>(), input_view.has_nulls()});
+    thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
+
     CHECK_CUDA(stream.value());
 
     auto output_column =