Skip to content

Commit

Permalink
apacheGH-40968 add set_dot_nl(true) for Like option
Browse files Browse the repository at this point in the history
  • Loading branch information
xxlaykxx committed Apr 3, 2024
1 parent 41a989c commit b1d9a7a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 26 deletions.
22 changes: 6 additions & 16 deletions cpp/src/gandiva/regex_functions_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,14 @@ Result<std::shared_ptr<LikeHolder>> LikeHolder::Make(const FunctionNode& node) {
"'like' function requires a string literal as the second parameter"));

RE2::Options regex_op;
regex_op.set_dot_nl(true); // set dotall mode for the regex.
if (node.descriptor()->name() == "ilike") {
regex_op.set_case_sensitive(false); // set case-insensitive for ilike function.

return Make(std::get<std::string>(literal->holder()), regex_op);
}
if (node.children().size() == 2) {
return Make(std::get<std::string>(literal->holder()));
return Make(std::get<std::string>(literal->holder()), regex_op);
} else {
auto escape_char = dynamic_cast<LiteralNode*>(node.children().at(2).get());
ARROW_RETURN_IF(
Expand All @@ -118,24 +119,13 @@ Result<std::shared_ptr<LikeHolder>> LikeHolder::Make(const FunctionNode& node) {
Status::Invalid(
"'like' function requires a string literal as the third parameter"));
return Make(std::get<std::string>(literal->holder()),
std::get<std::string>(escape_char->holder()));
std::get<std::string>(escape_char->holder()), regex_op);
}
}

Result<std::shared_ptr<LikeHolder>> LikeHolder::Make(const std::string& sql_pattern) {
std::string pcre_pattern;
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));

auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern,
"' failed with: ", lholder->regex_.error()));

return lholder;
}

Result<std::shared_ptr<LikeHolder>> LikeHolder::Make(const std::string& sql_pattern,
const std::string& escape_char) {
const std::string& escape_char,
RE2::Options regex_op) {
ARROW_RETURN_IF(escape_char.length() > 1,
Status::Invalid("The length of escape char ", escape_char,
" in 'like' function is greater than 1"));
Expand All @@ -147,7 +137,7 @@ Result<std::shared_ptr<LikeHolder>> LikeHolder::Make(const std::string& sql_patt
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
}

auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern,
"' failed with: ", lholder->regex_.error()));
Expand Down
5 changes: 2 additions & 3 deletions cpp/src/gandiva/regex_functions_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {

static Result<std::shared_ptr<LikeHolder>> Make(const FunctionNode& node);

static Result<std::shared_ptr<LikeHolder>> Make(const std::string& sql_pattern);

static Result<std::shared_ptr<LikeHolder>> Make(const std::string& sql_pattern,
const std::string& escape_char);
const std::string& escape_char,
RE2::Options regex_op);

static Result<std::shared_ptr<LikeHolder>> Make(const std::string& sql_pattern,
RE2::Options regex_op);
Expand Down
32 changes: 25 additions & 7 deletions cpp/src/gandiva/regex_functions_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class TestLikeHolder : public ::testing::Test {
};

TEST_F(TestLikeHolder, TestMatchAny) {
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab%", regex_op));

auto& like = *like_holder;
Expand All @@ -58,6 +59,7 @@ TEST_F(TestLikeHolder, TestMatchAny) {
}

TEST_F(TestLikeHolder, TestMatchOne) {
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab_", regex_op));

auto& like = *like_holder;
Expand All @@ -70,13 +72,22 @@ TEST_F(TestLikeHolder, TestMatchOne) {
}

TEST_F(TestLikeHolder, TestPcreSpecial) {
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make(".*ab_", regex_op));

auto& like = *like_holder;
EXPECT_TRUE(like(".*abc")); // . and * aren't special in sql regex
EXPECT_FALSE(like("xxabc"));
}

TEST_F(TestLikeHolder, TestPcreSpecialWithNewLine) {
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("%Space1.%", regex_op));

auto& like = *like_holder;
EXPECT_TRUE(like("[name: \"Space1.protect\"\nargs: \"count\"\ncolumn_name: \"pass_count\"]"));
}

TEST_F(TestLikeHolder, TestRegexEscape) {
std::string res;
ARROW_EXPECT_OK(RegexUtil::SqlLikePatternToPcre("#%hello#_abc_def##", '#', res));
Expand All @@ -85,20 +96,22 @@ TEST_F(TestLikeHolder, TestRegexEscape) {
}

TEST_F(TestLikeHolder, TestDot) {
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("abc.", regex_op));

auto& like = *like_holder;
EXPECT_FALSE(like("abcd"));
}

TEST_F(TestLikeHolder, TestMatchSubString) {
EXPECT_OK_AND_ASSIGN(auto like_holder, LikeHolder::Make("%abc%", "\\"));
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto like_holder, LikeHolder::Make("%abc%", "\\", regex_op));

auto& like = *like_holder;
EXPECT_TRUE(like("abc"));
EXPECT_FALSE(like("xxabdc"));

EXPECT_OK_AND_ASSIGN(like_holder, LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\"));
EXPECT_OK_AND_ASSIGN(like_holder, LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", regex_op));

auto& like_reserved_char = *like_holder;
EXPECT_TRUE(like_reserved_char("XXab-.^$*+?()[]{}|—/c%d"));
Expand Down Expand Up @@ -173,7 +186,8 @@ TEST_F(TestLikeHolder, TestOptimise) {
}

TEST_F(TestLikeHolder, TestMatchOneEscape) {
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\_", "\\"));
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\_", "\\", regex_op));

auto& like = *like_holder;

Expand All @@ -187,7 +201,8 @@ TEST_F(TestLikeHolder, TestMatchOneEscape) {
}

TEST_F(TestLikeHolder, TestMatchManyEscape) {
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\%", "\\"));
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\%", "\\", regex_op));

auto& like = *like_holder;

Expand All @@ -201,7 +216,8 @@ TEST_F(TestLikeHolder, TestMatchManyEscape) {
}

TEST_F(TestLikeHolder, TestMatchEscape) {
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\\\", "\\"));
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\\\", "\\", regex_op));

auto& like = *like_holder;

Expand All @@ -211,7 +227,8 @@ TEST_F(TestLikeHolder, TestMatchEscape) {
}

TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\_", ""));
regex_op.set_dot_nl(true);
EXPECT_OK_AND_ASSIGN(auto const like_holder, LikeHolder::Make("ab\\_", "", regex_op));

auto& like = *like_holder;

Expand All @@ -223,7 +240,8 @@ TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
}

TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
ASSERT_RAISES(Invalid, LikeHolder::Make("ab\\_", "\\\\").status());
regex_op.set_dot_nl(true);
ASSERT_RAISES(Invalid, LikeHolder::Make("ab\\_", "\\\\", regex_op).status());
}

class TestILikeHolder : public ::testing::Test {
Expand Down

0 comments on commit b1d9a7a

Please sign in to comment.