Skip to content

Commit

Permalink
Initial MATCH_WHOLE_STRING Implementation
Browse files Browse the repository at this point in the history
This new search option aims to ensure that the pattern matches the whole
input string. To bse used to imepement an `is_match` check.
  • Loading branch information
iwillspeak committed Aug 30, 2022
1 parent b808489 commit 7fdd503
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 4 deletions.
2 changes: 1 addition & 1 deletion doc/API
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ Oniguruma API Version 6.9.7 2021/03/03
ONIG_OPTION_NOT_END_STRING Do not regard the (end) as a string endpoint (* fail \z, \Z)
ONIG_OPTION_NOT_BEGIN_POSITION Do not regard the (start) as start position of search (* fail \G)
ONIG_OPTION_CALLBACK_EACH_MATCH Call back for all successful matches.

ONIG_OPTION_MATCH_WHOLE_STRING Try to match the whole of (str), rather than returning after the first match is found.

# int onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
const UChar* at, OnigRegion* region,
Expand Down
3 changes: 2 additions & 1 deletion src/oniguruma.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,9 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_NOT_END_STRING (ONIG_OPTION_NOT_BEGIN_STRING << 1)
#define ONIG_OPTION_NOT_BEGIN_POSITION (ONIG_OPTION_NOT_END_STRING << 1)
#define ONIG_OPTION_CALLBACK_EACH_MATCH (ONIG_OPTION_NOT_BEGIN_POSITION << 1)
#define ONIG_OPTION_MATCH_WHOLE_STRING (ONIG_OPTION_CALLBACK_EACH_MATCH << 1)

#define ONIG_OPTION_MAXBIT ONIG_OPTION_CALLBACK_EACH_MATCH
#define ONIG_OPTION_MAXBIT ONIG_OPTION_MATCH_WHOLE_STRING

#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
Expand Down
9 changes: 8 additions & 1 deletion src/regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -3155,6 +3155,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto fail;
}

// FIXME: This doesn't solve the issue. Properly implement backtracking
// to find longest match in whole string. Do we need to introduce
// a phony acnhor at the end of the string?
if (OPTON_MATCH_WHOLE_STRING(options)) {
best_len = ONIG_MISMATCH;
goto fail;
}

/* default behavior: return first-matching result. */
goto match_at_end;

Expand Down Expand Up @@ -5459,7 +5467,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
else goto finish; /* error */ \
}


/* anchor optimize: resume search range */
if (reg->anchor != 0 && str < end) {
UChar *min_semi_end, *max_semi_end;
Expand Down
1 change: 1 addition & 0 deletions src/regint.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ typedef unsigned int MemStatusType;
#define OPTON_NOT_BEGIN_STRING(option) ((option) & ONIG_OPTION_NOT_BEGIN_STRING)
#define OPTON_NOT_END_STRING(option) ((option) & ONIG_OPTION_NOT_END_STRING)
#define OPTON_NOT_BEGIN_POSITION(option) ((option) & ONIG_OPTION_NOT_BEGIN_POSITION)
#define OPTON_MATCH_WHOLE_STRING(option) ((option) & ONIG_OPTION_MATCH_WHOLE_STRING)


#define INFINITE_REPEAT -1
Expand Down
5 changes: 4 additions & 1 deletion test/test_options.c
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ extern int main(int argc, char* argv[])
n(ONIG_OPTION_NOT_END_STRING, "ab\\Z", "ab");
n(ONIG_OPTION_NOT_END_STRING, "ab\\Z", "ab\n");

x2(ONIG_OPTION_NONE, "a|abc", "abc", 0, 1);
x2(ONIG_OPTION_NONE, "(a|abc)\\Z", "abc", 0, 3);
x2(ONIG_OPTION_MATCH_WHOLE_STRING, "a|abc", "abc", 0, 3);

x2(ONIG_OPTION_WORD_IS_ASCII, "\\w", "@g", 1, 2);
n(ONIG_OPTION_WORD_IS_ASCII, "\\w", "あ");
x2(ONIG_OPTION_NONE, "\\d", "1", 0, 3);
Expand All @@ -219,6 +223,5 @@ extern int main(int argc, char* argv[])

onig_region_free(region, 1);
onig_end();

return ((nfail == 0 && nerror == 0) ? 0 : -1);
}

0 comments on commit 7fdd503

Please sign in to comment.