Skip to content

Commit

Permalink
GH-38263 [C++]: Prefer to call string_view::data() instead of begin()…
Browse files Browse the repository at this point in the history
… where a char pointer is expected (#38265)

### Rationale for this change

The MSVC compiler doesn't seem to allow user code to assume `std::string_view::const_iterator` is `const char*`, so using only `re2::StringPiece` and preferring to call `.data()` instead of `.begin()` should make things more uniform across different compilers and STL implementations.

### What changes are included in this PR?

 - Using `re2::StringPiece` instead of `std::string_view` to interact with `re2`
 - Use `data()` instead of `begin()` where a `char*` is expected

### Are these changes tested?

Yes, by existing tests.
* Closes: #38263

Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
  • Loading branch information
felipecrv authored Oct 16, 2023
1 parent fb26178 commit 93ca3b2
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions cpp/src/arrow/compute/kernels/scalar_string_ascii.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2063,12 +2063,12 @@ struct RegexSubstringReplacer {
regex_find_("(" + options_.pattern + ")", MakeRE2Options<Type>()),
regex_replacement_(options_.pattern, MakeRE2Options<Type>()) {}

Status ReplaceString(std::string_view s, TypedBufferBuilder<uint8_t>* builder) const {
Status ReplaceString(re2::StringPiece s, TypedBufferBuilder<uint8_t>* builder) const {
re2::StringPiece replacement(options_.replacement);

// If s is empty, then it's essentially global
if (options_.max_replacements == -1 || s.empty()) {
std::string s_copy(s);
std::string s_copy{s.data(), s.length()};
RE2::GlobalReplace(&s_copy, regex_replacement_, replacement);
return builder->Append(reinterpret_cast<const uint8_t*>(s_copy.data()),
s_copy.length());
Expand All @@ -2079,18 +2079,18 @@ struct RegexSubstringReplacer {
// We might do this faster similar to RE2::GlobalReplace using Match and Rewrite
const char* i = s.data();
const char* end = s.data() + s.length();
re2::StringPiece piece(s.data(), s.length());
re2::StringPiece mutable_s{s};

int64_t max_replacements = options_.max_replacements;
while ((i < end) && (max_replacements != 0)) {
std::string found;
if (!RE2::FindAndConsume(&piece, regex_find_, &found)) {
if (!RE2::FindAndConsume(&mutable_s, regex_find_, &found)) {
RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
static_cast<int64_t>(end - i)));
i = end;
} else {
// wind back to the beginning of the match
const char* pos = piece.begin() - found.length();
const char* pos = mutable_s.data() - found.length();
// the string before the pattern
RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(i),
static_cast<int64_t>(pos - i)));
Expand All @@ -2101,7 +2101,7 @@ struct RegexSubstringReplacer {
RETURN_NOT_OK(builder->Append(reinterpret_cast<const uint8_t*>(found.data()),
static_cast<int64_t>(found.length())));
// skip pattern
i = piece.begin();
i = mutable_s.data();
max_replacements--;
}
}
Expand Down

0 comments on commit 93ca3b2

Please sign in to comment.