From 7fdd503391ad5ed41b09efebee107a0ff4883e60 Mon Sep 17 00:00:00 2001 From: Will Speak Date: Tue, 30 Aug 2022 08:56:17 +0100 Subject: [PATCH] Initial MATCH_WHOLE_STRING Implementation This new search option aims to ensure that the pattern matches the whole input string. To bse used to imepement an `is_match` check. --- doc/API | 2 +- src/oniguruma.h | 3 ++- src/regexec.c | 9 ++++++++- src/regint.h | 1 + test/test_options.c | 5 ++++- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/doc/API b/doc/API index 30e9a264..a6030757 100644 --- a/doc/API +++ b/doc/API @@ -407,7 +407,7 @@ Oniguruma API Version 6.9.7 2021/03/03 ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z) ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G) ONIG_OPTION_CALLBACK_EACH_MATCH Call back for all successful matches. - + ONIG_OPTION_MATCH_WHOLE_STRING Try to match the whole of (str), rather than returning after the first match is found. # int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, diff --git a/src/oniguruma.h b/src/oniguruma.h index 096ba7dc..418bacdf 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -401,8 +401,9 @@ typedef unsigned int OnigOptionType; #define ONIG_OPTION_NOT_END_STRING (ONIG_OPTION_NOT_BEGIN_STRING << 1) #define ONIG_OPTION_NOT_BEGIN_POSITION (ONIG_OPTION_NOT_END_STRING << 1) #define ONIG_OPTION_CALLBACK_EACH_MATCH (ONIG_OPTION_NOT_BEGIN_POSITION << 1) +#define ONIG_OPTION_MATCH_WHOLE_STRING (ONIG_OPTION_CALLBACK_EACH_MATCH << 1) -#define ONIG_OPTION_MAXBIT ONIG_OPTION_CALLBACK_EACH_MATCH +#define ONIG_OPTION_MAXBIT ONIG_OPTION_MATCH_WHOLE_STRING #define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt)) #define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt)) diff --git a/src/regexec.c b/src/regexec.c index 23750be1..c8dc24c8 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -3155,6 +3155,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } + // FIXME: This doesn't solve the issue. Properly implement backtracking + // to find longest match in whole string. Do we need to introduce + // a phony acnhor at the end of the string? + if (OPTON_MATCH_WHOLE_STRING(options)) { + best_len = ONIG_MISMATCH; + goto fail; + } + /* default behavior: return first-matching result. */ goto match_at_end; @@ -5459,7 +5467,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } - /* anchor optimize: resume search range */ if (reg->anchor != 0 && str < end) { UChar *min_semi_end, *max_semi_end; diff --git a/src/regint.h b/src/regint.h index 7bc900d1..19878fc3 100644 --- a/src/regint.h +++ b/src/regint.h @@ -408,6 +408,7 @@ typedef unsigned int MemStatusType; #define OPTON_NOT_BEGIN_STRING(option) ((option) & ONIG_OPTION_NOT_BEGIN_STRING) #define OPTON_NOT_END_STRING(option) ((option) & ONIG_OPTION_NOT_END_STRING) #define OPTON_NOT_BEGIN_POSITION(option) ((option) & ONIG_OPTION_NOT_BEGIN_POSITION) +#define OPTON_MATCH_WHOLE_STRING(option) ((option) & ONIG_OPTION_MATCH_WHOLE_STRING) #define INFINITE_REPEAT -1 diff --git a/test/test_options.c b/test/test_options.c index 7010f0fe..8415e641 100644 --- a/test/test_options.c +++ b/test/test_options.c @@ -197,6 +197,10 @@ extern int main(int argc, char* argv[]) n(ONIG_OPTION_NOT_END_STRING, "ab\\Z", "ab"); n(ONIG_OPTION_NOT_END_STRING, "ab\\Z", "ab\n"); + x2(ONIG_OPTION_NONE, "a|abc", "abc", 0, 1); + x2(ONIG_OPTION_NONE, "(a|abc)\\Z", "abc", 0, 3); + x2(ONIG_OPTION_MATCH_WHOLE_STRING, "a|abc", "abc", 0, 3); + x2(ONIG_OPTION_WORD_IS_ASCII, "\\w", "@g", 1, 2); n(ONIG_OPTION_WORD_IS_ASCII, "\\w", "あ"); x2(ONIG_OPTION_NONE, "\\d", "1", 0, 3); @@ -219,6 +223,5 @@ extern int main(int argc, char* argv[]) onig_region_free(region, 1); onig_end(); - return ((nfail == 0 && nerror == 0) ? 0 : -1); }