Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add gbenchmarks for string substrings functions #7603

Merged
merged 2 commits into from
Mar 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,5 @@ ConfigureBench(STRINGS_BENCH
string/find_benchmark.cpp
string/replace_benchmark.cpp
string/split_benchmark.cpp
string/substring_benchmark.cpp
string/url_decode_benchmark.cpp)
2 changes: 2 additions & 0 deletions cpp/benchmarks/string/string_bench_args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include <benchmark/benchmark.h>

#include <cudf/types.hpp>

/**
* @brief Generate row count and row length argument ranges for a string benchmark.
*
Expand Down
93 changes: 93 additions & 0 deletions cpp/benchmarks/string/substring_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "string_bench_args.hpp"

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/substring.hpp>
#include <cudf_test/base_fixture.hpp>
#include <cudf_test/column_wrapper.hpp>

#include <limits>

#include <thrust/iterator/constant_iterator.h>

class StringSubstring : public cudf::benchmark {
};

enum substring_type { position, multi_position, delimiter, multi_delimiter };

static void BM_substring(benchmark::State& state, substring_type rt)
{
cudf::size_type const n_rows{static_cast<cudf::size_type>(state.range(0))};
cudf::size_type const max_str_length{static_cast<cudf::size_type>(state.range(1))};
data_profile table_profile;
table_profile.set_distribution_params(
cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
auto const table =
create_random_table({cudf::type_id::STRING}, 1, row_count{n_rows}, table_profile);
cudf::strings_column_view input(table->view().column(0));
auto starts_itr = thrust::constant_iterator<cudf::size_type>(1);
auto stops_itr = thrust::constant_iterator<cudf::size_type>(max_str_length / 2);
cudf::test::fixed_width_column_wrapper<int32_t> starts(starts_itr, starts_itr + n_rows);
cudf::test::fixed_width_column_wrapper<int32_t> stops(stops_itr, stops_itr + n_rows);
auto delim_itr = thrust::constant_iterator<std::string>(" ");
cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows);

for (auto _ : state) {
cuda_event_timer raii(state, true, 0);
switch (rt) {
case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break;
case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
case delimiter: cudf::strings::slice_strings(input, std::string{" "}, 1); break;
case multi_delimiter:
cudf::strings::slice_strings(input, cudf::strings_column_view(delimiters), 1);
break;
}
}

state.SetBytesProcessed(state.iterations() * input.chars_size());
}

static void generate_bench_args(benchmark::internal::Benchmark* b)
{
int const min_rows = 1 << 12;
int const max_rows = 1 << 24;
int const row_mult = 8;
int const min_rowlen = 1 << 5;
int const max_rowlen = 1 << 13;
int const len_mult = 4;
generate_string_bench_args(b, min_rows, max_rows, row_mult, min_rowlen, max_rowlen, len_mult);
}

#define STRINGS_BENCHMARK_DEFINE(name) \
BENCHMARK_DEFINE_F(StringSubstring, name) \
(::benchmark::State & st) { BM_substring(st, substring_type::name); } \
BENCHMARK_REGISTER_F(StringSubstring, name) \
->Apply(generate_bench_args) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

STRINGS_BENCHMARK_DEFINE(position)
STRINGS_BENCHMARK_DEFINE(multi_position)
STRINGS_BENCHMARK_DEFINE(delimiter)
STRINGS_BENCHMARK_DEFINE(multi_delimiter)
136 changes: 61 additions & 75 deletions cpp/src/strings/substring.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -43,17 +43,25 @@ namespace {
* using the provided start, stop, and step parameters.
*/
struct substring_fn {
const column_device_view d_column;
numeric_scalar_device_view<size_type> d_start, d_stop, d_step;
const int32_t* d_offsets{};
column_device_view const d_column;
numeric_scalar_device_view<size_type> const d_start;
numeric_scalar_device_view<size_type> const d_stop;
numeric_scalar_device_view<size_type> const d_step;
int32_t* d_offsets{};
char* d_chars{};

__device__ cudf::size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) return 0; // null string
string_view d_str = d_column.template element<string_view>(idx);
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_column.template element<string_view>(idx);
auto const length = d_str.length();
if (length == 0) return 0; // empty string
if (length == 0) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
size_type const step = d_step.is_valid() ? d_step.value() : 1;
auto const begin = [&] { // always inclusive
// when invalid, default depends on step
Expand Down Expand Up @@ -88,7 +96,7 @@ struct substring_fn {
if (d_buffer) d_buffer += from_char_utf8(*itr, d_buffer);
itr += step;
}
return bytes;
if (!d_chars) d_offsets[idx] = bytes;
}
};

Expand All @@ -103,42 +111,26 @@ std::unique_ptr<column> slice_strings(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
{
size_type strings_count = strings.size();
if (strings_count == 0) return make_empty_strings_column(stream, mr);
if (strings.is_empty()) return make_empty_strings_column(stream, mr);

if (step.is_valid()) CUDF_EXPECTS(step.value(stream) != 0, "Step parameter must not be 0");

auto strings_column = column_device_view::create(strings.parent(), stream);
auto d_column = *strings_column;
auto d_start = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(start));
auto d_stop = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(stop));
auto d_step = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(step));

// copy the null mask
rmm::device_buffer null_mask = cudf::detail::copy_bitmask(strings.parent(), stream, mr);

// build offsets column
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<int32_t>(0), substring_fn{d_column, d_start, d_stop, d_step});
auto offsets_column = make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_new_offsets = offsets_column->view().data<int32_t>();

// build chars column
auto bytes = cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column = strings::detail::create_chars_child_column(
strings_count, strings.null_count(), bytes, stream, mr);
auto d_chars = chars_column->mutable_view().data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
strings_count,
substring_fn{d_column, d_start, d_stop, d_step, d_new_offsets, d_chars});
auto const d_column = column_device_view::create(strings.parent(), stream);
auto const d_start = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(start));
auto const d_stop = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(stop));
auto const d_step = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(step));

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
auto children = make_strings_children(substring_fn{*d_column, d_start, d_stop, d_step},
strings.size(),
strings.null_count(),
stream,
mr);

return make_strings_column(strings.size(),
std::move(children.first),
std::move(children.second),
strings.null_count(),
std::move(null_mask),
cudf::detail::copy_bitmask(strings.parent(), stream, mr),
stream,
mr);
}
Expand Down Expand Up @@ -166,25 +158,33 @@ namespace {
* This both calculates the output size and executes the substring.
*/
struct substring_from_fn {
const column_device_view d_column;
const cudf::detail::input_indexalator starts;
const cudf::detail::input_indexalator stops;
const int32_t* d_offsets{};
column_device_view const d_column;
cudf::detail::input_indexalator const starts;
cudf::detail::input_indexalator const stops;
int32_t* d_offsets{};
char* d_chars{};

__device__ size_type operator()(size_type idx)
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) return 0; // null string
string_view d_str = d_column.template element<string_view>(idx);
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const d_str = d_column.template element<string_view>(idx);
auto const length = d_str.length();
auto const start = starts[idx];
if (start >= length) return 0; // empty string
if (start >= length) {
if (!d_chars) d_offsets[idx] = 0;
return;
}
auto const stop = stops[idx];
auto const end = (((stop < 0) || (stop > length)) ? length : stop);

string_view d_substr = d_str.substr(start, end - start);
if (d_chars) memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes());
return d_substr.size_bytes();
auto const d_substr = d_str.substr(start, end - start);
if (d_chars)
memcpy(d_chars + d_offsets[idx], d_substr.data(), d_substr.size_bytes());
else
d_offsets[idx] = d_substr.size_bytes();
}
};

Expand Down Expand Up @@ -212,32 +212,18 @@ std::unique_ptr<column> compute_substrings_from_fn(column_device_view const& d_c
auto strings_count = d_column.size();

// Copy the null mask
rmm::device_buffer null_mask{0, stream, mr};
if (d_column.nullable())
null_mask = rmm::device_buffer(
d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr);

// Build offsets column
auto offsets_transformer_itr = thrust::make_transform_iterator(
thrust::make_counting_iterator<size_type>(0), substring_from_fn{d_column, starts, stops});
auto offsets_column = cudf::strings::detail::make_offsets_child_column(
offsets_transformer_itr, offsets_transformer_itr + strings_count, stream, mr);
auto d_new_offsets = offsets_column->view().data<int32_t>();

// Build chars column
auto bytes = cudf::detail::get_value<int32_t>(offsets_column->view(), strings_count, stream);
auto chars_column =
cudf::strings::detail::create_chars_child_column(strings_count, null_count, bytes, stream, mr);
auto chars_view = chars_column->mutable_view();
auto d_chars = chars_view.template data<char>();
thrust::for_each_n(rmm::exec_policy(stream),
thrust::make_counting_iterator<cudf::size_type>(0),
strings_count,
substring_from_fn{d_column, starts, stops, d_new_offsets, d_chars});
rmm::device_buffer null_mask =
!d_column.nullable()
? rmm::device_buffer{0, stream, mr}
: rmm::device_buffer(
d_column.null_mask(), cudf::bitmask_allocation_size_bytes(strings_count), stream, mr);

auto children = make_strings_children(
substring_from_fn{d_column, starts, stops}, strings_count, null_count, stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
std::move(chars_column),
std::move(children.first),
std::move(children.second),
null_count,
std::move(null_mask),
stream,
Expand Down