Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce compile time/size for scan.cu #7516

Merged
merged 4 commits into from
Mar 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ ConfigureBench(REDUCTION_BENCH
reduction/anyall_benchmark.cpp
reduction/dictionary_benchmark.cpp
reduction/reduce_benchmark.cpp
reduction/scan_benchmark.cpp
reduction/minmax_benchmark.cpp)

###################################################################################################
Expand Down
63 changes: 63 additions & 0 deletions cpp/benchmarks/reduction/scan_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/reduction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>

class ReductionScan : public cudf::benchmark {
};

template <typename type>
static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
auto const dtype = cudf::type_to_id<type>();
auto const table = create_random_table({dtype}, 1, row_count{n_rows});
if (!include_nulls) table->get_column(0).set_null_mask(rmm::device_buffer{}, 0);
cudf::column_view input(table->view().column(0));

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE);
}
}

#define SCAN_BENCHMARK_DEFINE(name, type, nulls) \
BENCHMARK_DEFINE_F(ReductionScan, name) \
(::benchmark::State & state) { BM_reduction_scan<type>(state, nulls); } \
BENCHMARK_REGISTER_F(ReductionScan, name) \
->UseManualTime() \
->Arg(10000) /* 10k */ \
->Arg(100000) /* 100k */ \
->Arg(1000000) /* 1M */ \
->Arg(10000000) /* 10M */ \
->Arg(100000000); /* 100M */

SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false);
SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false);
SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false);
SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false);
SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true);
SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true);
SCAN_BENCHMARK_DEFINE(double_nulls, double, true);
48 changes: 28 additions & 20 deletions cpp/include/cudf/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -71,41 +71,47 @@ inline auto make_counting_transform_iterator(cudf::size_type start, UnaryFunctio
}

/**
* @brief value accessor of column with null bitmask
* A unary functor returns scalar value at `id`.
* `operator() (cudf::size_type id)` computes `element` and valid flag at `id`
* This functor is only allowed for nullable columns.
* @brief Value accessor of column that may have a null bitmask.
*
* the return value for element `i` will return `column[i]`
* if it is valid, or `null_replacement` if it is null.
* This unary functor returns scalar value at `id`.
* The `operator()(cudf::size_type id)` computes the `element` and valid flag at `id`.
*
* @throws cudf::logic_error if the column is not nullable.
* @throws cudf::logic_error if column datatype and Element type mismatch.
* The return value for element `i` will return `column[i]`
* if it is valid, or `null_replacement` if it is null.
*
* @tparam Element The type of elements in the column
*/
template <typename Element>
struct null_replaced_value_accessor {
column_device_view const col; ///< column view of column in device
Element const null_replacement{}; ///< value returned when element is null
bool const has_nulls; ///< true if col has null elements

/**
* @brief constructor
* @param[in] _col column device view of cudf column
* @brief Creates an accessor for a null-replacement iterator.
*
* @throws cudf::logic_error if `col` type does not match Element type.
* @throws cudf::logic_error if `has_nulls` is true but `col` does not have a validity mask.
*
* @param[in] col column device view of cudf column
* @param[in] null_replacement The value to return for null elements
* @param[in] has_nulls Must be set to true if `col` has nulls.
*/
null_replaced_value_accessor(column_device_view const& _col, Element null_val)
: col{_col}, null_replacement{null_val}
null_replaced_value_accessor(column_device_view const& col,
Element null_val,
bool has_nulls = true)
: col{col}, null_replacement{null_val}, has_nulls{has_nulls}
{
CUDF_EXPECTS(data_type(type_to_id<Element>()) == col.type(), "the data type mismatch");
// verify valid is non-null, otherwise, is_valid_nocheck() will crash
CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column.");
CUDF_EXPECTS(type_to_id<Element>() == device_storage_type_id(col.type().id()),
"the data type mismatch");
// verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash
if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask");
}

CUDA_DEVICE_CALLABLE
Element operator()(cudf::size_type i) const
{
return col.is_valid_nocheck(i) ? col.element<Element>(i) : null_replacement;
return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element<Element>(i);
}
};

Expand Down Expand Up @@ -140,23 +146,25 @@ struct validity_accessor {
*
* Dereferencing the returned iterator for element `i` will return `column[i]`
* if it is valid, or `null_replacement` if it is null.
* This iterator is only allowed for nullable columns.
* This iterator is only allowed for both nullable and non-nullable columns.
*
* @throws cudf::logic_error if the column is not nullable.
* @throws cudf::logic_error if column datatype and Element type mismatch.
*
* @tparam Element The type of elements in the column
* @param column The column to iterate
* @param null_replacement The value to return for null elements
* @return auto Iterator that returns valid column elements, or a null
* @param has_nulls Must be set to true if `column` has nulls.
* @return Iterator that returns valid column elements, or a null
* replacement value for null elements.
*/
template <typename Element>
auto make_null_replacement_iterator(column_device_view const& column,
Element const null_replacement = Element{0})
Element const null_replacement = Element{0},
bool has_nulls = true)
{
return make_counting_transform_iterator(
0, null_replaced_value_accessor<Element>{column, null_replacement});
0, null_replaced_value_accessor<Element>{column, null_replacement, has_nulls});
}

/**
Expand Down
50 changes: 17 additions & 33 deletions cpp/src/reductions/scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/detail/utilities/device_operators.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/reduction.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand All @@ -34,6 +33,7 @@

namespace cudf {
namespace detail {

/**
* @brief Dispatcher for running Scan operation on input column
* Dispatches scan operation on `Op` and creates output column
Expand Down Expand Up @@ -73,23 +73,14 @@ struct scan_dispatcher {
mutable_column_view output = output_column->mutable_view();
auto d_input = column_device_view::create(input_view, stream);

if (input_view.has_nulls()) {
auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
thrust::exclusive_scan(rmm::exec_policy(stream),
input,
input + size,
output.data<T>(),
Op::template identity<T>(),
Op{});
} else {
auto input = d_input->begin<T>();
thrust::exclusive_scan(rmm::exec_policy(stream),
input,
input + size,
output.data<T>(),
Op::template identity<T>(),
Op{});
}
auto input =
make_null_replacement_iterator(*d_input, Op::template identity<T>(), input_view.has_nulls());
thrust::exclusive_scan(rmm::exec_policy(stream),
input,
input + size,
output.data<T>(),
Op::template identity<T>(),
Op{});

CHECK_CUDA(stream.value());
return output_column;
Expand Down Expand Up @@ -147,13 +138,9 @@ struct scan_dispatcher {
auto d_input = column_device_view::create(input_view, stream);
mutable_column_view output = output_column->mutable_view();

if (input_view.has_nulls()) {
auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
} else {
auto input = d_input->begin<T>();
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
}
auto const input =
make_null_replacement_iterator(*d_input, Op::template identity<T>(), input_view.has_nulls());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});

CHECK_CUDA(stream.value());
return output_column;
Expand All @@ -171,13 +158,10 @@ struct scan_dispatcher {

auto d_input = column_device_view::create(input_view, stream);

if (input_view.has_nulls()) {
auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
} else {
auto input = d_input->begin<T>();
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
}
auto input =
make_null_replacement_iterator(*d_input, Op::template identity<T>(), input_view.has_nulls());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});

CHECK_CUDA(stream.value());

auto output_column =
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/iterator/value_iterator_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -343,7 +343,7 @@ TYPED_TEST(IteratorTest, error_handling)

CUDF_EXPECT_THROW_MESSAGE((cudf::detail::make_null_replacement_iterator(
*d_col_no_null, cudf::test::make_type_param_scalar<T>(0))),
"Unexpected non-nullable column.");
"column with nulls must have a validity bitmask");

CUDF_EXPECT_THROW_MESSAGE((d_col_no_null->pair_begin<T, true>()),
"Unexpected non-nullable column.");
Expand Down
29 changes: 24 additions & 5 deletions cpp/tests/reductions/scan_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -509,8 +509,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanSum)
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const expected = fp_wrapper{{1, 3, 6, 10}, scale};
auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::INCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);

auto const with_nulls = fp_wrapper({1, 2, 3, 0, 4, 0}, {1, 1, 1, 0, 1, 0}, scale);
auto const expected_nulls = fp_wrapper({1, 3, 6, 0, 10, 0}, {1, 1, 1, 0, 1, 0}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::INCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls);
}
}

Expand All @@ -526,8 +531,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointPreScanSum)
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const expected = fp_wrapper{{0, 1, 3, 6}, scale};
auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);

auto const with_nulls = fp_wrapper({0, 1, 2, 3, 0, 4}, {0, 1, 1, 1, 0, 1}, scale);
auto const expected_nulls = fp_wrapper({0, 0, 1, 3, 0, 6}, {0, 1, 1, 1, 0, 1}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls);
}
}

Expand Down Expand Up @@ -556,8 +566,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMin)
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const expected = fp_wrapper{{1, 1, 1, 1}, scale};
auto const result = cudf::scan(column, cudf::make_min_aggregation(), scan_type::INCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);

auto const with_nulls = fp_wrapper({1, 0, 2, 0, 3, 4}, {1, 0, 1, 0, 1, 1}, scale);
auto const expected_nulls = fp_wrapper({1, 0, 1, 0, 1, 1}, {1, 0, 1, 0, 1, 1}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_min_aggregation(), scan_type::INCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls);
}
}

Expand All @@ -572,7 +587,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMax)
auto const scale = scale_type{i};
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const result = cudf::scan(column, cudf::make_max_aggregation(), scan_type::INCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), column);

auto const with_nulls = fp_wrapper({1, 0, 0, 2, 3, 4}, {1, 0, 0, 1, 1, 1}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_max_aggregation(), scan_type::INCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), with_nulls);
}
}