Skip to content
/ cudf Public
forked from rapidsai/cudf

Commit

Permalink
Add cudf::strings:udf::replace function (rapidsai#12210)
Browse files Browse the repository at this point in the history
Add udf function for replacing all occurrences of a target string within a source string with a replacement string.
This is to support rapidsai#12207

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - https://github.com/brandon-b-miller

URL: rapidsai#12210
  • Loading branch information
davidwendt authored Nov 23, 2022
1 parent 104309d commit d970c5a
Showing 1 changed file with 64 additions and 0 deletions.
64 changes: 64 additions & 0 deletions python/strings_udf/cpp/include/cudf/strings/udf/replace.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/strings/string_view.cuh>
#include <cudf/strings/udf/udf_string.cuh>

namespace cudf {
namespace strings {
namespace udf {

/**
* @brief Returns new string replacing all occurrences of target with replacement
*
* If target is empty then replacement is inserted between every character.
*
* @param source Source string to search
* @param target String to match within source
* @param replacement String to replace the target within the source
* @return Resulting string
*/
__device__ inline udf_string replace(string_view source,
string_view target,
string_view replacement)
{
udf_string result;
auto const tgt_length = target.length();
auto const src_length = source.length();
size_type last_position = 0;
size_type position = 0;
while (position != string_view::npos) {
position = source.find(target, last_position);
if (position != string_view::npos) {
result.append(source.substr(last_position, position - last_position));
result.append(replacement);
last_position = position + tgt_length;
if ((tgt_length == 0) && (++last_position <= src_length)) {
result.append(source.substr(position, 1));
}
}
}
if (last_position < src_length) {
result.append(source.substr(last_position, src_length - last_position));
}

return result;
}

} // namespace udf
} // namespace strings
} // namespace cudf

0 comments on commit d970c5a

Please sign in to comment.