From a70908dc3cada0b3a7bc1fd06f7fa6982b4b1160 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 12 Aug 2022 14:05:03 +0900 Subject: [PATCH] ARROW-17370: [C++] Add limit to SplitString() (#13833) Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/util/string.cc | 9 +++++++-- cpp/src/arrow/util/string.h | 3 ++- cpp/src/arrow/util/string_test.cc | 26 ++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/string.cc b/cpp/src/arrow/util/string.cc index 3a1586005528f..00ab8e64c4757 100644 --- a/cpp/src/arrow/util/string.cc +++ b/cpp/src/arrow/util/string.cc @@ -92,11 +92,16 @@ Status ParseHexValue(const char* data, uint8_t* out) { namespace internal { -std::vector SplitString(util::string_view v, char delimiter) { +std::vector SplitString(util::string_view v, char delimiter, + int64_t limit) { std::vector parts; size_t start = 0, end; while (true) { - end = v.find(delimiter, start); + if (limit > 0 && static_cast(limit - 1) <= parts.size()) { + end = std::string::npos; + } else { + end = v.find(delimiter, start); + } parts.push_back(v.substr(start, end - start)); if (end == std::string::npos) { break; diff --git a/cpp/src/arrow/util/string.h b/cpp/src/arrow/util/string.h index d2c8ac38eeca6..b2baa0ebedaaf 100644 --- a/cpp/src/arrow/util/string.h +++ b/cpp/src/arrow/util/string.h @@ -45,7 +45,8 @@ namespace internal { /// \brief Split a string with a delimiter ARROW_EXPORT -std::vector SplitString(util::string_view v, char delim); +std::vector SplitString(util::string_view v, char delim, + int64_t limit = 0); /// \brief Join strings with a delimiter ARROW_EXPORT diff --git a/cpp/src/arrow/util/string_test.cc b/cpp/src/arrow/util/string_test.cc index 057d885fcdb75..2aa6fccbd9a0f 100644 --- a/cpp/src/arrow/util/string_test.cc +++ b/cpp/src/arrow/util/string_test.cc @@ -140,5 +140,31 @@ TEST(SplitString, OnlyDemiliter) { EXPECT_EQ(parts[1], ""); } +TEST(SplitString, Limit) { + std::string input = "a:b:c"; + auto parts = SplitString(input, ':', 2); + ASSERT_EQ(parts.size(), 2); + EXPECT_EQ(parts[0], "a"); + EXPECT_EQ(parts[1], "b:c"); +} + +TEST(SplitString, LimitOver) { + std::string input = "a:b:c"; + auto parts = SplitString(input, ':', 4); + ASSERT_EQ(parts.size(), 3); + EXPECT_EQ(parts[0], "a"); + EXPECT_EQ(parts[1], "b"); + EXPECT_EQ(parts[2], "c"); +} + +TEST(SplitString, LimitZero) { + std::string input = "a:b:c"; + auto parts = SplitString(input, ':', 0); + ASSERT_EQ(parts.size(), 3); + EXPECT_EQ(parts[0], "a"); + EXPECT_EQ(parts[1], "b"); + EXPECT_EQ(parts[2], "c"); +} + } // namespace internal } // namespace arrow