Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor dictionary support for reductions any/all #7242

Merged
merged 10 commits into from
Feb 8, 2021
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ ConfigureBench(TYPE_DISPATCHER_BENCH "${TD_BENCH_SRC}")

set(REDUCTION_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/reduction/anyall_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/reduction/dictionary_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/reduction/reduce_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/reduction/minmax_benchmark.cpp")

Expand Down
86 changes: 86 additions & 0 deletions cpp/benchmarks/reduction/dictionary_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/reduction.hpp>
#include <cudf/types.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <memory>
#include <random>

class ReductionDictionary : public cudf::benchmark {
};

template <typename T>
void BM_reduction_dictionary(benchmark::State& state, std::unique_ptr<cudf::aggregation> const& agg)
{
const cudf::size_type column_size{static_cast<cudf::size_type>(state.range(0))};

cudf::test::UniformRandomGenerator<long> rand_gen(0, 100);
auto data_it = cudf::test::make_counting_transform_iterator(
0, [&rand_gen](cudf::size_type row) { return rand_gen.generate(); });
cudf::test::dictionary_column_wrapper<T, typename decltype(data_it)::value_type> values(
data_it, data_it + column_size);

auto input_column = cudf::column_view(values);
// cudf::data_type output_dtype{cudf::type_id::BOOL8};
cudf::data_type output_dtype = [&] {
if (agg->kind == cudf::aggregation::ANY || agg->kind == cudf::aggregation::ALL)
return cudf::data_type{cudf::type_id::BOOL8};
if (agg->kind == cudf::aggregation::MEAN) return cudf::data_type{cudf::type_id::FLOAT64};
return cudf::data_type{cudf::type_to_id<T>()};
}();

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::reduce(input_column, agg, output_dtype);
}
}

#define concat(a, b, c) a##b##c
#define get_agg(op) concat(cudf::make_, op, _aggregation())

// TYPE, OP
#define RBM_BENCHMARK_DEFINE(name, type, aggregation) \
BENCHMARK_DEFINE_F(ReductionDictionary, name)(::benchmark::State & state) \
{ \
BM_reduction_dictionary<type>(state, get_agg(aggregation)); \
} \
BENCHMARK_REGISTER_F(ReductionDictionary, name) \
->UseManualTime() \
->Arg(10000) /* 10k */ \
->Arg(100000) /* 100k */ \
->Arg(1000000) /* 1M */ \
->Arg(10000000) /* 10M */ \
->Arg(100000000); /* 100M */

#define REDUCE_BENCHMARK_DEFINE(type, aggregation) \
RBM_BENCHMARK_DEFINE(concat(type, _, aggregation), type, aggregation)

REDUCE_BENCHMARK_DEFINE(int32_t, all);
REDUCE_BENCHMARK_DEFINE(float, all);
REDUCE_BENCHMARK_DEFINE(int32_t, any);
REDUCE_BENCHMARK_DEFINE(float, any);
REDUCE_BENCHMARK_DEFINE(int32_t, min);
REDUCE_BENCHMARK_DEFINE(float, min);
REDUCE_BENCHMARK_DEFINE(int32_t, max);
REDUCE_BENCHMARK_DEFINE(float, max);
REDUCE_BENCHMARK_DEFINE(int32_t, mean);
REDUCE_BENCHMARK_DEFINE(float, mean);
20 changes: 11 additions & 9 deletions cpp/include/cudf/dictionary/detail/iterator.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -66,27 +66,28 @@ auto make_dictionary_iterator(column_device_view const& dictionary_column)
* @brief Accessor functor for returning a dictionary pair iterator.
*
* @tparam KeyType The type of the dictionary's key element.
* @tparam has_nulls Set to `true` if `d_dictionary` has nulls.
*
* @throw cudf::logic_error if `has_nulls==true` and `d_dictionary` is not nullable.
*/
template <typename KeyType, bool has_nulls>
template <typename KeyType>
struct dictionary_access_pair_fn {
dictionary_access_pair_fn(column_device_view const& d_dictionary) : d_dictionary{d_dictionary}
dictionary_access_pair_fn(column_device_view const& d_dictionary, bool has_nulls = true)
: d_dictionary{d_dictionary}, has_nulls{has_nulls}
{
if (has_nulls) { CUDF_EXPECTS(d_dictionary.nullable(), "unexpected non-nullable column"); }
}

__device__ thrust::pair<KeyType, bool> operator()(size_type idx) const
{
if (has_nulls && d_dictionary.is_null(idx)) return {KeyType{}, false};
auto keys = d_dictionary.child(1);
auto keys = d_dictionary.child(dictionary_column_view::keys_column_index);
return {keys.element<KeyType>(static_cast<size_type>(d_dictionary.element<dictionary32>(idx))),
true};
};

private:
column_device_view const d_dictionary;
bool has_nulls;
};

/**
Expand All @@ -100,19 +101,20 @@ struct dictionary_access_pair_fn {
* @throw cudf::logic_error if `dictionary_column` is not a dictionary column.
*
* @tparam KeyType The type of the dictionary's key element.
* @tparam has_nulls Set to `true` if the dictionary_column has nulls.
*
* @param dictionary_column The dictionary device view to iterate.
* @param has_nulls Set to `true` if the `dictionary_column` has nulls.
* @return Pair iterator with `{value,valid}`
*/
template <typename KeyType, bool has_nulls>
auto make_dictionary_pair_iterator(column_device_view const& dictionary_column)
template <typename KeyType>
auto make_dictionary_pair_iterator(column_device_view const& dictionary_column,
bool has_nulls = true)
{
CUDF_EXPECTS(is_dictionary(dictionary_column.type()),
"Dictionary iterator is only for dictionary columns");
return thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0),
dictionary_access_pair_fn<KeyType, has_nulls>{dictionary_column});
dictionary_access_pair_fn<KeyType>{dictionary_column, has_nulls});
}

} // namespace detail
Expand Down
54 changes: 50 additions & 4 deletions cpp/src/reductions/all.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,8 +18,50 @@
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <reductions/simple.cuh>

#include <thrust/logical.h>

namespace cudf {
namespace reduction {
namespace detail {
namespace {

/**
* @brief Compute reduction all() for dictionary columns.
*
* This compiles 10x faster than using the cudf::simple::reduction::detail::reduce
* utility. It also can execute faster for very large columns.
*/
struct all_fn {
template <typename T, std::enable_if_t<std::is_arithmetic<T>::value>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const d_dict = cudf::column_device_view::create(input, stream);
auto const iter = [&] {
auto null_iter =
cudf::reduction::op::min{}.template get_null_replacing_element_transformer<bool>();
auto pair_iter =
cudf::dictionary::detail::make_dictionary_pair_iterator<T>(*d_dict, input.has_nulls());
return thrust::make_transform_iterator(pair_iter, null_iter);
}();
return std::make_unique<numeric_scalar<bool>>(
thrust::all_of(rmm::exec_policy(stream), iter, iter + input.size(), thrust::identity<bool>()),
true,
stream,
mr);
}
template <typename T, std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*)
{
CUDF_FAIL("Unexpected key type for dictionary in reduction all()");
}
};

} // namespace
} // namespace detail

std::unique_ptr<cudf::scalar> all(column_view const& col,
cudf::data_type const output_dtype,
Expand All @@ -28,9 +70,13 @@ std::unique_ptr<cudf::scalar> all(column_view const& col,
{
CUDF_EXPECTS(output_dtype == cudf::data_type(cudf::type_id::BOOL8),
"all() operation can be applied with output type `BOOL8` only");
auto const dispatch_type =
cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type();
return cudf::type_dispatcher(dispatch_type,

if (cudf::is_dictionary(col.type())) {
return cudf::type_dispatcher(
dictionary_column_view(col).keys().type(), detail::all_fn{}, col, stream, mr);
}
// dispatch for non-dictionary types
return cudf::type_dispatcher(col.type(),
simple::bool_result_element_dispatcher<cudf::reduction::op::min>{},
col,
stream,
Expand Down
53 changes: 49 additions & 4 deletions cpp/src/reductions/any.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,8 +18,50 @@
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <reductions/simple.cuh>

#include <thrust/logical.h>

namespace cudf {
namespace reduction {
namespace detail {
namespace {

/**
* @brief Compute reduction any() for dictionary columns.
*
* This compiles 10x faster than using the cudf::simple::reduction::detail::reduce
* utility. It also can execute faster for very large columns.
*/
struct any_fn {
template <typename T, std::enable_if_t<std::is_arithmetic<T>::value>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const& input,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto const d_dict = cudf::column_device_view::create(input, stream);
auto const iter = [&] {
auto null_iter =
cudf::reduction::op::max{}.template get_null_replacing_element_transformer<bool>();
auto pair_iter =
cudf::dictionary::detail::make_dictionary_pair_iterator<T>(*d_dict, input.has_nulls());
return thrust::make_transform_iterator(pair_iter, null_iter);
}();
return std::make_unique<numeric_scalar<bool>>(
thrust::any_of(rmm::exec_policy(stream), iter, iter + input.size(), thrust::identity<bool>()),
true,
stream,
mr);
}
template <typename T, std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
std::unique_ptr<scalar> operator()(column_view const&,
rmm::cuda_stream_view,
rmm::mr::device_memory_resource*)
{
CUDF_FAIL("Unexpected key type for dictionary in reduction any()");
}
};

} // namespace
} // namespace detail

std::unique_ptr<cudf::scalar> any(column_view const& col,
cudf::data_type const output_dtype,
Expand All @@ -28,9 +70,12 @@ std::unique_ptr<cudf::scalar> any(column_view const& col,
{
CUDF_EXPECTS(output_dtype == cudf::data_type(cudf::type_id::BOOL8),
"any() operation can be applied with output type `bool8` only");
auto const dispatch_type =
cudf::is_dictionary(col.type()) ? dictionary_column_view(col).keys().type() : col.type();
return cudf::type_dispatcher(dispatch_type,
if (cudf::is_dictionary(col.type())) {
return cudf::type_dispatcher(
dictionary_column_view(col).keys().type(), detail::any_fn{}, col, stream, mr);
}
// dispatch for non-dictionary types
return cudf::type_dispatcher(col.type(),
simple::bool_result_element_dispatcher<cudf::reduction::op::max>{},
col,
stream,
Expand Down
20 changes: 6 additions & 14 deletions cpp/src/reductions/compound.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -70,19 +70,11 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
it, col.size(), compound_op, valid_count, ddof, stream, mr);
}
} else {
if (col.has_nulls()) {
auto it = thrust::make_transform_iterator(
cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType, true>(*dcol),
compound_op.template get_null_replacing_element_transformer<ResultType>());
result = detail::reduce<Op, decltype(it), ResultType>(
it, col.size(), compound_op, valid_count, ddof, stream, mr);
} else {
auto it = thrust::make_transform_iterator(
cudf::dictionary::detail::make_dictionary_iterator<ElementType>(*dcol),
compound_op.template get_element_transformer<ResultType>());
result = detail::reduce<Op, decltype(it), ResultType>(
it, col.size(), compound_op, valid_count, ddof, stream, mr);
}
auto it = thrust::make_transform_iterator(
cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType>(*dcol, col.has_nulls()),
compound_op.template get_null_replacing_element_transformer<ResultType>());
result = detail::reduce<Op, decltype(it), ResultType>(
it, col.size(), compound_op, valid_count, ddof, stream, mr);
}

// set scalar is valid
Expand Down
22 changes: 7 additions & 15 deletions cpp/src/reductions/simple.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -139,17 +139,11 @@ std::unique_ptr<scalar> dictionary_reduction(column_view const& col,
auto simple_op = Op{};

auto result = [&] {
if (col.has_nulls()) {
auto f = simple_op.template get_null_replacing_element_transformer<ResultType>();
auto p = cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType, true>(*dcol);
auto it = thrust::make_transform_iterator(p, f);
return detail::reduce(it, col.size(), simple_op, stream, mr);
} else {
auto f = simple_op.template get_element_transformer<ResultType>();
auto p = cudf::dictionary::detail::make_dictionary_iterator<ElementType>(*dcol);
auto it = thrust::make_transform_iterator(p, f);
return detail::reduce(it, col.size(), simple_op, stream, mr);
}
auto f = simple_op.template get_null_replacing_element_transformer<ResultType>();
auto p =
cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType>(*dcol, col.has_nulls());
auto it = thrust::make_transform_iterator(p, f);
return detail::reduce(it, col.size(), simple_op, stream, mr);
}();

// set scalar is valid
Expand Down Expand Up @@ -232,9 +226,7 @@ struct bool_result_element_dispatcher {
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return cudf::is_dictionary(col.type())
? dictionary_reduction<ElementType, bool, Op>(col, stream, mr)
: simple_reduction<ElementType, bool, Op>(col, stream, mr);
return simple_reduction<ElementType, bool, Op>(col, stream, mr);
}

template <typename ElementType,
Expand Down