From f9806ff8a4194e27444d7b8e06a1785d651298f0 Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 13 Oct 2021 08:46:50 -0400 Subject: [PATCH] Use optional-iterator for copy-if-else kernel (#9324) This PR changes the `cudf::detail::copy_if_else` utility functions to accept an optional-iterator instead of a pair-iterator. This improves the compile time of source files by generating 4x less kernels since the two input data arrays can each have nulls requiring 4 different pair-iterators to be created to call it. The optional-iterator allows the nulls check to occur at runtime instead of compile time. The changes in this PR are for the callers of `detail::copy_if_else` to provide optional-iterators instead of pair-iterators. This PR is dependent on the changes in #9306 The benchmarks for the effected calling functions showed no significant change in runtime performance using the single optional-iterator over 4 unique pair-iterators. Two additional benchmarks are included cover non-null measurement which this PR impacts the most. Also, the `copy_tests.cu` was renamed `copy_tests.cpp` and the test that launched to the internal `cudf::detail::copy_if_else_kernel` was replaced with one with a data-set large enough to require multiple blocks. Related changes for the strings specific `cudf::strings::detail::copy_if_else` are in #9266 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - Robert Maynard (https://github.com/robertmaynard) - Jake Hemstad (https://github.com/jrhemstad) URL: https://github.com/rapidsai/cudf/pull/9324 --- cpp/benchmarks/CMakeLists.txt | 7 +- .../copying/copy_if_else_benchmark.cpp | 67 ++++++++++ cpp/benchmarks/replace/clamp_benchmark.cpp | 20 +-- cpp/benchmarks/replace/nans_benchmark.cpp | 63 ++++++++++ cpp/include/cudf/detail/copy_if_else.cuh | 34 ++--- cpp/src/copying/copy.cu | 118 +++++++++--------- cpp/src/copying/segmented_shift.cu | 36 ++---- cpp/src/dictionary/replace.cu | 15 +-- cpp/src/replace/nans.cu | 59 ++------- cpp/tests/CMakeLists.txt | 2 +- .../copying/{copy_tests.cu => copy_tests.cpp} | 112 ++++------------- 11 files changed, 270 insertions(+), 263 deletions(-) create mode 100644 cpp/benchmarks/copying/copy_if_else_benchmark.cpp create mode 100644 cpp/benchmarks/replace/nans_benchmark.cpp rename cpp/tests/copying/{copy_tests.cu => copy_tests.cpp} (83%) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index b3b92003573..aeaee60086d 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -88,6 +88,10 @@ ConfigureBench(CONTIGUOUS_SPLIT_BENCH copying/contiguous_split_benchmark.cu) # - shift benchmark ------------------------------------------------------------------------------- ConfigureBench(SHIFT_BENCH copying/shift_benchmark.cu) +################################################################################################### +# - copy-if-else benchmark ----------------------------------------------------------------------------- +ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else_benchmark.cpp) + ################################################################################################### # - transpose benchmark --------------------------------------------------------------------------- ConfigureBench(TRANSPOSE_BENCH transpose/transpose_benchmark.cu) @@ -141,7 +145,8 @@ ConfigureBench(REDUCTION_BENCH ################################################################################################### # - reduction benchmark --------------------------------------------------------------------------- ConfigureBench(REPLACE_BENCH - replace/clamp_benchmark.cpp) + replace/clamp_benchmark.cpp + replace/nans_benchmark.cpp) ################################################################################################### # - filling benchmark ----------------------------------------------------------------------------- diff --git a/cpp/benchmarks/copying/copy_if_else_benchmark.cpp b/cpp/benchmarks/copying/copy_if_else_benchmark.cpp new file mode 100644 index 00000000000..513e4f4c179 --- /dev/null +++ b/cpp/benchmarks/copying/copy_if_else_benchmark.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include + +class CopyIfElse : public cudf::benchmark { +}; + +template +static void BM_copy_if_else(benchmark::State& state, bool nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto input_type = cudf::type_to_id(); + auto bool_type = cudf::type_id::BOOL8; + auto const input = create_random_table({input_type, input_type, bool_type}, 3, row_count{n_rows}); + + if (!nulls) { + input->get_column(2).set_null_mask(rmm::device_buffer{}, 0); + input->get_column(1).set_null_mask(rmm::device_buffer{}, 0); + input->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + } + + cudf::column_view decision(input->view().column(2)); + cudf::column_view rhs(input->view().column(1)); + cudf::column_view lhs(input->view().column(0)); + + for (auto _ : state) { + cuda_event_timer raii(state, true, rmm::cuda_stream_default); + cudf::copy_if_else(lhs, rhs, decision); + } +} + +#define COPY_BENCHMARK_DEFINE(name, type, b) \ + BENCHMARK_DEFINE_F(CopyIfElse, name) \ + (::benchmark::State & st) { BM_copy_if_else(st, b); } \ + BENCHMARK_REGISTER_F(CopyIfElse, name) \ + ->RangeMultiplier(8) \ + ->Ranges({{1 << 12, 1 << 27}}) \ + ->UseManualTime() \ + ->Unit(benchmark::kMillisecond); + +COPY_BENCHMARK_DEFINE(int16, int16_t, true) +COPY_BENCHMARK_DEFINE(uint32, uint32_t, true) +COPY_BENCHMARK_DEFINE(float64, double, true) +COPY_BENCHMARK_DEFINE(int16_no_nulls, int16_t, false) +COPY_BENCHMARK_DEFINE(uint32_no_nulls, uint32_t, false) +COPY_BENCHMARK_DEFINE(float64_no_nulls, double, false) diff --git a/cpp/benchmarks/replace/clamp_benchmark.cpp b/cpp/benchmarks/replace/clamp_benchmark.cpp index f897b9d82cc..4d9da4aca6d 100644 --- a/cpp/benchmarks/replace/clamp_benchmark.cpp +++ b/cpp/benchmarks/replace/clamp_benchmark.cpp @@ -30,7 +30,7 @@ class ReplaceClamp : public cudf::benchmark { }; template -static void BM_reduction_scan(benchmark::State& state, bool include_nulls) +static void BM_clamp(benchmark::State& state, bool include_nulls) { cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; auto const dtype = cudf::type_to_id(); @@ -58,15 +58,15 @@ static void BM_reduction_scan(benchmark::State& state, bool include_nulls) } } -#define CLAMP_BENCHMARK_DEFINE(name, type, nulls) \ - BENCHMARK_DEFINE_F(ReplaceClamp, name) \ - (::benchmark::State & state) { BM_reduction_scan(state, nulls); } \ - BENCHMARK_REGISTER_F(ReplaceClamp, name) \ - ->UseManualTime() \ - ->Arg(10000) /* 10k */ \ - ->Arg(100000) /* 100k */ \ - ->Arg(1000000) /* 1M */ \ - ->Arg(10000000) /* 10M */ \ +#define CLAMP_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReplaceClamp, name) \ + (::benchmark::State & state) { BM_clamp(state, nulls); } \ + BENCHMARK_REGISTER_F(ReplaceClamp, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ ->Arg(100000000); /* 100M */ CLAMP_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false); diff --git a/cpp/benchmarks/replace/nans_benchmark.cpp b/cpp/benchmarks/replace/nans_benchmark.cpp new file mode 100644 index 00000000000..a337ae5e7ad --- /dev/null +++ b/cpp/benchmarks/replace/nans_benchmark.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class ReplaceNans : public cudf::benchmark { +}; + +template +static void BM_replace_nans(benchmark::State& state, bool include_nulls) +{ + cudf::size_type const n_rows{(cudf::size_type)state.range(0)}; + auto const dtype = cudf::type_to_id(); + auto const table = create_random_table({dtype}, 1, row_count{n_rows}); + if (!include_nulls) { table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); } + cudf::column_view input(table->view().column(0)); + + auto zero = cudf::make_fixed_width_scalar(0); + + for (auto _ : state) { + cuda_event_timer timer(state, true); + auto result = cudf::replace_nans(input, *zero); + } +} + +#define NANS_BENCHMARK_DEFINE(name, type, nulls) \ + BENCHMARK_DEFINE_F(ReplaceNans, name) \ + (::benchmark::State & state) { BM_replace_nans(state, nulls); } \ + BENCHMARK_REGISTER_F(ReplaceNans, name) \ + ->UseManualTime() \ + ->Arg(10000) /* 10k */ \ + ->Arg(100000) /* 100k */ \ + ->Arg(1000000) /* 1M */ \ + ->Arg(10000000) /* 10M */ \ + ->Arg(100000000); /* 100M */ + +NANS_BENCHMARK_DEFINE(float32_nulls, float, true); +NANS_BENCHMARK_DEFINE(float64_nulls, double, true); +NANS_BENCHMARK_DEFINE(float32_no_nulls, float, false); +NANS_BENCHMARK_DEFINE(float64_no_nulls, double, false); diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh index 23fd3e87783..1eb050d6a8f 100644 --- a/cpp/include/cudf/detail/copy_if_else.cuh +++ b/cpp/include/cudf/detail/copy_if_else.cuh @@ -19,18 +19,11 @@ #include #include #include -#include #include #include -#include -#include -#include -#include #include -#include - namespace cudf { namespace detail { namespace { // anonymous @@ -40,7 +33,7 @@ template + bool has_nulls> __launch_bounds__(block_size) __global__ void copy_if_else_kernel(LeftIter lhs, RightIter rhs, @@ -71,23 +64,14 @@ __launch_bounds__(block_size) __global__ size_type warp_cur = warp_begin + warp_id; size_type index = tid; while (warp_cur <= warp_end) { - bool in_range = (index >= begin && index < end); - - bool valid = true; - if (has_validity) { - valid = in_range && (filter(index) ? thrust::get<1>(lhs[index]) : thrust::get<1>(rhs[index])); - } - - // do the copy if-else - if (in_range) { - out.element(index) = filter(index) ? static_cast(thrust::get<0>(lhs[index])) - : static_cast(thrust::get<0>(rhs[index])); - } + auto const opt_value = + (index < end) ? (filter(index) ? lhs[index] : rhs[index]) : thrust::nullopt; + if (not has_nulls or opt_value) { out.element(index) = static_cast(*opt_value); } // update validity - if (has_validity) { + if (has_nulls) { // the final validity mask for this warp - int warp_mask = __ballot_sync(0xFFFF'FFFF, valid && in_range); + int warp_mask = __ballot_sync(0xFFFF'FFFF, opt_value.has_value()); // only one guy in the warp needs to update the mask and count if (lane_id == 0) { out.set_mask_word(warp_cur, warp_mask); @@ -100,7 +84,7 @@ __launch_bounds__(block_size) __global__ index += block_size * gridDim.x; } - if (has_validity) { + if (has_nulls) { // sum all null counts across all warps size_type block_valid_count = single_lane_block_sum_reduce(warp_valid_count); @@ -168,8 +152,8 @@ std::unique_ptr copy_if_else( rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) { - using Element = - typename thrust::tuple_element<0, typename thrust::iterator_traits::value_type>::type; + // This is the type of the thrust::optional element in the passed iterators + using Element = typename thrust::iterator_traits::value_type::value_type; size_type size = std::distance(lhs_begin, lhs_end); size_type num_els = cudf::util::round_up_safe(size, warp_size); diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu index 2ee3fee4577..688298e8150 100644 --- a/cpp/src/copying/copy.cu +++ b/cpp/src/copying/copy.cu @@ -20,11 +20,13 @@ #include #include #include +#include #include #include #include #include +#include namespace cudf { namespace detail { @@ -73,28 +75,18 @@ struct copy_if_else_functor_impl auto const& lhs = *p_lhs; auto const& rhs = *p_rhs; - if (left_nullable) { - if (right_nullable) { - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); - return detail::copy_if_else( - true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); - } - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); - return detail::copy_if_else( - true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); - } - if (right_nullable) { - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); - return detail::copy_if_else( - true, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); - } - auto lhs_iter = cudf::detail::make_pair_iterator(lhs); - auto rhs_iter = cudf::detail::make_pair_iterator(rhs); - return detail::copy_if_else( - false, lhs_iter, lhs_iter + size, rhs_iter, filter, lhs.type(), stream, mr); + auto lhs_iter = + cudf::detail::make_optional_iterator(lhs, contains_nulls::DYNAMIC{}, left_nullable); + auto rhs_iter = + cudf::detail::make_optional_iterator(rhs, contains_nulls::DYNAMIC{}, right_nullable); + return detail::copy_if_else(left_nullable || right_nullable, + lhs_iter, + lhs_iter + size, + rhs_iter, + filter, + lhs.type(), + stream, + mr); } }; @@ -247,6 +239,38 @@ std::unique_ptr scatter_gather_based_if_else(cudf::scalar const& lhs, return scatter_gather_based_if_else(lhs, rhs_col->view(), size, is_left, stream, mr); } +template <> +struct copy_if_else_functor_impl { + template + std::unique_ptr operator()(Left const& lhs, + Right const& rhs, + size_type size, + bool, + bool, + Filter filter, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); + } +}; + +template <> +struct copy_if_else_functor_impl { + template + std::unique_ptr operator()(Left const& lhs, + Right const& rhs, + size_type size, + bool, + bool, + Filter filter, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) + { + return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); + } +}; + /** * @brief Functor called by the `type_dispatcher` to invoke copy_if_else on combinations * of column_view and scalar @@ -262,12 +286,6 @@ struct copy_if_else_functor { rmm::cuda_stream_view stream, rmm::mr::device_memory_resource* mr) { - if constexpr (std::is_same_v or std::is_same_v) { - (void)left_nullable; - (void)right_nullable; - return scatter_gather_based_if_else(lhs, rhs, size, filter, stream, mr); - } - copy_if_else_functor_impl copier{}; return copier(lhs, rhs, size, left_nullable, right_nullable, filter, stream, mr); } @@ -292,35 +310,21 @@ std::unique_ptr copy_if_else(Left const& lhs, auto bool_mask_device_p = column_device_view::create(boolean_mask); column_device_view bool_mask_device = *bool_mask_device_p; - if (boolean_mask.has_nulls()) { - auto filter = [bool_mask_device] __device__(cudf::size_type i) { - return bool_mask_device.is_valid_nocheck(i) and bool_mask_device.element(i); - }; - return cudf::type_dispatcher(lhs.type(), - copy_if_else_functor{}, - lhs, - rhs, - boolean_mask.size(), - left_nullable, - right_nullable, - filter, - stream, - mr); - } else { - auto filter = [bool_mask_device] __device__(cudf::size_type i) { - return bool_mask_device.element(i); - }; - return cudf::type_dispatcher(lhs.type(), - copy_if_else_functor{}, - lhs, - rhs, - boolean_mask.size(), - left_nullable, - right_nullable, - filter, - stream, - mr); - } + auto const has_nulls = boolean_mask.has_nulls(); + auto filter = [bool_mask_device, has_nulls] __device__(cudf::size_type i) { + return (!has_nulls || bool_mask_device.is_valid_nocheck(i)) and + bool_mask_device.element(i); + }; + return cudf::type_dispatcher(lhs.type(), + copy_if_else_functor{}, + lhs, + rhs, + boolean_mask.size(), + left_nullable, + right_nullable, + filter, + stream, + mr); } }; // namespace diff --git a/cpp/src/copying/segmented_shift.cu b/cpp/src/copying/segmented_shift.cu index f7e2c341ac1..ee6f40017db 100644 --- a/cpp/src/copying/segmented_shift.cu +++ b/cpp/src/copying/segmented_shift.cu @@ -75,30 +75,20 @@ struct segmented_shift_functor() rmm::mr::device_memory_resource* mr) { auto values_device_view = column_device_view::create(segmented_values, stream); - auto fill_pair_iterator = make_pair_iterator(fill_value); bool nullable = not fill_value.is_valid() or segmented_values.nullable(); - - if (segmented_values.has_nulls()) { - auto input_pair_iterator = make_pair_iterator(*values_device_view) - offset; - return copy_if_else(nullable, - input_pair_iterator, - input_pair_iterator + segmented_values.size(), - fill_pair_iterator, - segmented_shift_filter{segment_offsets, offset}, - segmented_values.type(), - stream, - mr); - } else { - auto input_pair_iterator = make_pair_iterator(*values_device_view) - offset; - return copy_if_else(nullable, - input_pair_iterator, - input_pair_iterator + segmented_values.size(), - fill_pair_iterator, - segmented_shift_filter{segment_offsets, offset}, - segmented_values.type(), - stream, - mr); - } + auto input_iterator = + cudf::detail::make_optional_iterator( + *values_device_view, contains_nulls::DYNAMIC{}, segmented_values.has_nulls()) - + offset; + auto fill_iterator = cudf::detail::make_optional_iterator(fill_value, contains_nulls::YES{}); + return copy_if_else(nullable, + input_iterator, + input_iterator + segmented_values.size(), + fill_iterator, + segmented_shift_filter{segment_offsets, offset}, + segmented_values.type(), + stream, + mr); } }; diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu index 11c81ee434b..a8313c62545 100644 --- a/cpp/src/dictionary/replace.cu +++ b/cpp/src/dictionary/replace.cu @@ -58,17 +58,14 @@ std::unique_ptr replace_indices(column_view const& input, auto const d_input = *input_view; auto predicate = [d_input] __device__(auto i) { return d_input.is_valid(i); }; - using Element = typename thrust:: - tuple_element<0, typename thrust::iterator_traits::value_type>::type; - - auto input_pair_iterator = cudf::detail::indexalator_factory::make_input_pair_iterator(input); + auto input_iterator = cudf::detail::indexalator_factory::make_input_optional_iterator(input); return cudf::detail::copy_if_else(true, - input_pair_iterator, - input_pair_iterator + input.size(), + input_iterator, + input_iterator + input.size(), replacement_iter, predicate, - data_type{type_to_id()}, + data_type{type_to_id()}, stream, mr); } @@ -100,7 +97,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, auto new_indices = replace_indices(input_indices, - cudf::detail::indexalator_factory::make_input_pair_iterator(repl_indices), + cudf::detail::indexalator_factory::make_input_optional_iterator(repl_indices), stream, mr); @@ -133,7 +130,7 @@ std::unique_ptr replace_nulls(dictionary_column_view const& input, auto const input_indices = input_view.get_indices_annotated(); auto new_indices = replace_indices(input_indices, - cudf::detail::indexalator_factory::make_input_pair_iterator(*scalar_index), + cudf::detail::indexalator_factory::make_input_optional_iterator(*scalar_index), stream, mr); new_indices->set_null_mask(rmm::device_buffer{0, stream, mr}, 0); diff --git a/cpp/src/replace/nans.cu b/cpp/src/replace/nans.cu index 44ec582f30a..3811c43a210 100644 --- a/cpp/src/replace/nans.cu +++ b/cpp/src/replace/nans.cu @@ -55,53 +55,18 @@ struct replace_nans_functor { return dinput.is_null(i) or !std::isnan(dinput.element(i)); }; - if (input.has_nulls()) { - auto input_pair_iterator = make_pair_iterator(*input_device_view); - if (replacement_nullable) { - auto replacement_pair_iterator = make_pair_iterator(replacement); - return copy_if_else(true, - input_pair_iterator, - input_pair_iterator + size, - replacement_pair_iterator, - predicate, - input.type(), - stream, - mr); - } else { - auto replacement_pair_iterator = make_pair_iterator(replacement); - return copy_if_else(true, - input_pair_iterator, - input_pair_iterator + size, - replacement_pair_iterator, - predicate, - input.type(), - stream, - mr); - } - } else { - auto input_pair_iterator = make_pair_iterator(*input_device_view); - if (replacement_nullable) { - auto replacement_pair_iterator = make_pair_iterator(replacement); - return copy_if_else(true, - input_pair_iterator, - input_pair_iterator + size, - replacement_pair_iterator, - predicate, - input.type(), - stream, - mr); - } else { - auto replacement_pair_iterator = make_pair_iterator(replacement); - return copy_if_else(false, - input_pair_iterator, - input_pair_iterator + size, - replacement_pair_iterator, - predicate, - input.type(), - stream, - mr); - } - } + auto input_iterator = + make_optional_iterator(*input_device_view, contains_nulls::DYNAMIC{}, input.has_nulls()); + auto replacement_iterator = + make_optional_iterator(replacement, contains_nulls::DYNAMIC{}, replacement_nullable); + return copy_if_else(input.has_nulls() or replacement_nullable, + input_iterator, + input_iterator + size, + replacement_iterator, + predicate, + input.type(), + stream, + mr); } template diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5d104dec8dd..cd2085c50ab 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -219,7 +219,7 @@ ConfigureTest(COPYING_TEST copying/concatenate_tests.cu copying/copy_if_else_nested_tests.cpp copying/copy_range_tests.cpp - copying/copy_tests.cu + copying/copy_tests.cpp copying/detail_gather_tests.cu copying/gather_list_tests.cpp copying/gather_str_tests.cpp diff --git a/cpp/tests/copying/copy_tests.cu b/cpp/tests/copying/copy_tests.cpp similarity index 83% rename from cpp/tests/copying/copy_tests.cu rename to cpp/tests/copying/copy_tests.cpp index 03869c37adf..8d4e9295783 100644 --- a/cpp/tests/copying/copy_tests.cu +++ b/cpp/tests/copying/copy_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,14 +21,9 @@ #include #include -#include #include -#include #include #include -#include - -#include template struct CopyTest : public cudf::test::BaseFixture { @@ -66,90 +61,6 @@ TYPED_TEST(CopyTest, CopyIfElseTestManyNulls) CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w); } -struct copy_if_else_tiny_grid_functor { - template ())> - std::unique_ptr operator()(cudf::column_view const& lhs, - cudf::column_view const& rhs, - Filter filter, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - // output - std::unique_ptr out = - cudf::allocate_like(lhs, lhs.size(), cudf::mask_allocation_policy::RETAIN, mr); - - // device views - auto lhs_view = cudf::column_device_view::create(lhs); - auto rhs_view = cudf::column_device_view::create(rhs); - auto lhs_iter = cudf::detail::make_pair_iterator(*lhs_view); - auto rhs_iter = cudf::detail::make_pair_iterator(*rhs_view); - auto out_dv = cudf::mutable_column_device_view::create(*out); - - // call the kernel with an artificially small grid - cudf::detail::copy_if_else_kernel<32, T, decltype(lhs_iter), decltype(rhs_iter), Filter, false> - <<<1, 32, 0, stream.value()>>>(lhs_iter, rhs_iter, filter, *out_dv, nullptr); - - return out; - } - - template ())> - std::unique_ptr operator()(cudf::column_view const& lhs, - cudf::column_view const& rhs, - Filter filter, - rmm::cuda_stream_view stream, - rmm::mr::device_memory_resource* mr) - { - CUDF_FAIL("Unexpected test execution"); - } -}; - -std::unique_ptr tiny_grid_launch(cudf::column_view const& lhs, - cudf::column_view const& rhs, - cudf::column_view const& boolean_mask) -{ - auto bool_mask_device_p = cudf::column_device_view::create(boolean_mask); - cudf::column_device_view bool_mask_device = *bool_mask_device_p; - auto filter = [bool_mask_device] __device__(cudf::size_type i) { - return bool_mask_device.element(i); - }; - return cudf::type_dispatcher(lhs.type(), - copy_if_else_tiny_grid_functor{}, - lhs, - rhs, - filter, - rmm::cuda_stream_default, - rmm::mr::get_current_device_resource()); -} - -TYPED_TEST(CopyTest, CopyIfElseTestTinyGrid) -{ - using T = TypeParam; - - // make sure we span at least 2 warps - int num_els = 64; - - bool mask[] = {1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; - cudf::test::fixed_width_column_wrapper mask_w(mask, mask + num_els); - - wrapper lhs_w({5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}); - - wrapper rhs_w({6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6}); - - wrapper expected_w({5, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, - 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}); - - auto out = tiny_grid_launch(lhs_w, rhs_w, mask_w); - - CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w); -} - TYPED_TEST(CopyTest, CopyIfElseTestLong) { using T = TypeParam; @@ -190,6 +101,27 @@ TYPED_TEST(CopyTest, CopyIfElseTestLong) CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w); } +TYPED_TEST(CopyTest, CopyIfElseTestMultipleBlocks) +{ + using T = TypeParam; + + int num = 1000; // larger than a single block + std::vector h_lhs(num, 5); + std::vector h_rhs(num, 6); + std::vector h_mask(num, false); + std::vector h_validity(num, true); + h_validity[0] = 0; + + cudf::test::fixed_width_column_wrapper lhs_w( + h_lhs.begin(), h_lhs.end(), h_validity.begin()); + cudf::test::fixed_width_column_wrapper rhs_w( + h_rhs.begin(), h_rhs.end(), h_validity.begin()); + cudf::test::fixed_width_column_wrapper mask_w(h_mask.begin(), h_mask.end()); + + auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w); + CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), rhs_w); +} + TYPED_TEST(CopyTest, CopyIfElseTestEmptyInputs) { using T = TypeParam;