diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 25d012b1b33..403b24e58de 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -211,7 +211,7 @@ ConfigureBench(STRINGS_BENCH string/contains_benchmark.cpp string/convert_datetime_benchmark.cpp string/convert_durations_benchmark.cpp - string/convert_floats_benchmark.cpp + string/convert_numerics_benchmark.cpp string/copy_benchmark.cpp string/extract_benchmark.cpp string/factory_benchmark.cu diff --git a/cpp/benchmarks/string/convert_floats_benchmark.cpp b/cpp/benchmarks/string/convert_floats_benchmark.cpp deleted file mode 100644 index 03d3d4a9439..00000000000 --- a/cpp/benchmarks/string/convert_floats_benchmark.cpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include -#include - -#include -#include - -#include -#include - -namespace { -template -std::unique_ptr get_floats_column(int64_t array_size) -{ - std::unique_ptr tbl; - if (sizeof(FloatType) == sizeof(float)) { - tbl = create_random_table( - {cudf::type_id::FLOAT32}, 1, row_count{static_cast(array_size)}); - } else { - tbl = create_random_table( - {cudf::type_id::FLOAT64}, 1, row_count{static_cast(array_size)}); - } - return std::move(tbl->release().front()); -} - -std::unique_ptr get_floats_string_column(int64_t array_size) -{ - const auto floats = get_floats_column(array_size); - return cudf::strings::from_floats(floats->view()); -} -} // anonymous namespace - -class StringToFloatNumber : public cudf::benchmark { -}; - -template -void convert_to_float_number(benchmark::State& state) -{ - const auto array_size = state.range(0); - const auto strings_col = get_floats_string_column(array_size); - const auto strings_view = cudf::strings_column_view(strings_col->view()); - - for (auto _ : state) { - cuda_event_timer raii(state, true); - volatile auto results = cudf::strings::to_floats(strings_view, cudf::data_type{float_type}); - } - - // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed( - state.iterations() * - (strings_view.chars_size() + array_size * cudf::size_of(cudf::data_type{float_type}))); -} - -class StringFromFloatNumber : public cudf::benchmark { -}; - -template -void convert_from_float_number(benchmark::State& state) -{ - const auto array_size = state.range(0); - const auto floats = get_floats_column(array_size); - const auto floats_view = floats->view(); - std::unique_ptr results = nullptr; - - for (auto _ : state) { - cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 - results = cudf::strings::from_floats(floats_view); - } - - // bytes_processed = bytes_input + bytes_output - state.SetBytesProcessed( - state.iterations() * - (cudf::strings_column_view(results->view()).chars_size() + array_size * sizeof(FloatType))); -} - -#define CV_TO_FLOATS_BENCHMARK_DEFINE(name, float_type_id) \ - BENCHMARK_DEFINE_F(StringToFloatNumber, name)(::benchmark::State & state) \ - { \ - convert_to_float_number(state); \ - } \ - BENCHMARK_REGISTER_F(StringToFloatNumber, name) \ - ->RangeMultiplier(4) \ - ->Range(1 << 10, 1 << 17) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -#define CV_FROM_FLOATS_BENCHMARK_DEFINE(name, float_type) \ - BENCHMARK_DEFINE_F(StringFromFloatNumber, name)(::benchmark::State & state) \ - { \ - convert_from_float_number(state); \ - } \ - BENCHMARK_REGISTER_F(StringFromFloatNumber, name) \ - ->RangeMultiplier(4) \ - ->Range(1 << 10, 1 << 17) \ - ->UseManualTime() \ - ->Unit(benchmark::kMicrosecond); - -CV_TO_FLOATS_BENCHMARK_DEFINE(string_to_float32, cudf::type_id::FLOAT32); -CV_TO_FLOATS_BENCHMARK_DEFINE(string_to_float64, cudf::type_id::FLOAT64); - -CV_FROM_FLOATS_BENCHMARK_DEFINE(string_from_float32, float); -CV_FROM_FLOATS_BENCHMARK_DEFINE(string_from_float64, double); diff --git a/cpp/benchmarks/string/convert_numerics_benchmark.cpp b/cpp/benchmarks/string/convert_numerics_benchmark.cpp new file mode 100644 index 00000000000..86f4d413974 --- /dev/null +++ b/cpp/benchmarks/string/convert_numerics_benchmark.cpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include +#include + +namespace { + +template +std::unique_ptr get_numerics_column(cudf::size_type rows) +{ + std::unique_ptr result = + create_random_table({cudf::type_to_id()}, 1, row_count{rows}); + return std::move(result->release().front()); +} + +template +std::unique_ptr get_strings_column(cudf::size_type rows) +{ + auto const numerics_col = get_numerics_column(rows); + if constexpr (std::is_floating_point_v) { + return cudf::strings::from_floats(numerics_col->view()); + } else { + return cudf::strings::from_integers(numerics_col->view()); + } +} +} // anonymous namespace + +class StringsToNumeric : public cudf::benchmark { +}; + +template +void convert_to_number(benchmark::State& state) +{ + auto const rows = static_cast(state.range(0)); + + auto const strings_col = get_strings_column(rows); + auto const strings_view = cudf::strings_column_view(strings_col->view()); + auto const col_type = cudf::type_to_id(); + + for (auto _ : state) { + cuda_event_timer raii(state, true); + if constexpr (std::is_floating_point_v) { + cudf::strings::to_floats(strings_view, cudf::data_type{col_type}); + } else { + cudf::strings::to_integers(strings_view, cudf::data_type{col_type}); + } + } + + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed(state.iterations() * + (strings_view.chars_size() + rows * sizeof(NumericType))); +} + +class StringsFromNumeric : public cudf::benchmark { +}; + +template +void convert_from_number(benchmark::State& state) +{ + auto const rows = static_cast(state.range(0)); + + auto const numerics_col = get_numerics_column(rows); + auto const numerics_view = numerics_col->view(); + + std::unique_ptr results = nullptr; + + for (auto _ : state) { + cuda_event_timer raii(state, true); + if constexpr (std::is_floating_point_v) + results = cudf::strings::from_floats(numerics_view); + else + results = cudf::strings::from_integers(numerics_view); + } + + // bytes_processed = bytes_input + bytes_output + state.SetBytesProcessed( + state.iterations() * + (cudf::strings_column_view(results->view()).chars_size() + rows * sizeof(NumericType))); +} + +#define CONVERT_TO_NUMERICS_BD(name, type) \ + BENCHMARK_DEFINE_F(StringsToNumeric, name)(::benchmark::State & state) \ + { \ + convert_to_number(state); \ + } \ + BENCHMARK_REGISTER_F(StringsToNumeric, name) \ + ->RangeMultiplier(4) \ + ->Range(1 << 10, 1 << 17) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +#define CONVERT_FROM_NUMERICS_BD(name, type) \ + BENCHMARK_DEFINE_F(StringsFromNumeric, name)(::benchmark::State & state) \ + { \ + convert_from_number(state); \ + } \ + BENCHMARK_REGISTER_F(StringsFromNumeric, name) \ + ->RangeMultiplier(4) \ + ->Range(1 << 10, 1 << 17) \ + ->UseManualTime() \ + ->Unit(benchmark::kMicrosecond); + +CONVERT_TO_NUMERICS_BD(strings_to_float32, float); +CONVERT_TO_NUMERICS_BD(strings_to_float64, double); +CONVERT_TO_NUMERICS_BD(strings_to_int32, int32_t); +CONVERT_TO_NUMERICS_BD(strings_to_int64, int64_t); +CONVERT_TO_NUMERICS_BD(strings_to_uint8, uint8_t); +CONVERT_TO_NUMERICS_BD(strings_to_uint16, uint16_t); + +CONVERT_FROM_NUMERICS_BD(strings_from_float32, float); +CONVERT_FROM_NUMERICS_BD(strings_from_float64, double); +CONVERT_FROM_NUMERICS_BD(strings_from_int32, int32_t); +CONVERT_FROM_NUMERICS_BD(strings_from_int64, int64_t); +CONVERT_FROM_NUMERICS_BD(strings_from_uint8, uint8_t); +CONVERT_FROM_NUMERICS_BD(strings_from_uint16, uint16_t);