From d970c5ae7716ba90d53100e8c97705416bf520ed Mon Sep 17 00:00:00 2001 From: David Wendt <45795991+davidwendt@users.noreply.github.com> Date: Wed, 23 Nov 2022 09:44:14 -0500 Subject: [PATCH] Add cudf::strings:udf::replace function (#12210) Add udf function for replacing all occurrences of a target string within a source string with a replacement string. This is to support #12207 Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Bradley Dice (https://github.com/bdice) - https://github.com/brandon-b-miller URL: https://github.com/rapidsai/cudf/pull/12210 --- .../cpp/include/cudf/strings/udf/replace.cuh | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/replace.cuh diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/replace.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/replace.cuh new file mode 100644 index 00000000000..c1f0cdc94c5 --- /dev/null +++ b/python/strings_udf/cpp/include/cudf/strings/udf/replace.cuh @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cudf { +namespace strings { +namespace udf { + +/** + * @brief Returns new string replacing all occurrences of target with replacement + * + * If target is empty then replacement is inserted between every character. + * + * @param source Source string to search + * @param target String to match within source + * @param replacement String to replace the target within the source + * @return Resulting string + */ +__device__ inline udf_string replace(string_view source, + string_view target, + string_view replacement) +{ + udf_string result; + auto const tgt_length = target.length(); + auto const src_length = source.length(); + size_type last_position = 0; + size_type position = 0; + while (position != string_view::npos) { + position = source.find(target, last_position); + if (position != string_view::npos) { + result.append(source.substr(last_position, position - last_position)); + result.append(replacement); + last_position = position + tgt_length; + if ((tgt_length == 0) && (++last_position <= src_length)) { + result.append(source.substr(position, 1)); + } + } + } + if (last_position < src_length) { + result.append(source.substr(last_position, src_length - last_position)); + } + + return result; +} + +} // namespace udf +} // namespace strings +} // namespace cudf