Skip to content

Commit

Permalink
Fix incorrect compiling when [Aa] etc. are quantified
Browse files Browse the repository at this point in the history
  • Loading branch information
PhilipHazel committed Jan 26, 2022
1 parent 419e3c6 commit fdd9479
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 8 deletions.
6 changes: 6 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ Clarke PR#72.
21. A user discovered that the library names in CMakeLists.txt for MSVC
debugger (PDB) files were incorrect - perhaps never tried for PCRE2?

22. An item such as [Aa] is optimized into a caseless single character match.
When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a
pattern, the optimizing "must be present for a match" character check was not
being flagged as caseless, causing some matches that should have succeeded to
fail.


Version 10.39 29-October-2021
-----------------------------
Expand Down
21 changes: 13 additions & 8 deletions src/pcre2_compile.c
Original file line number Diff line number Diff line change
Expand Up @@ -2115,17 +2115,17 @@ if (c == CHAR_LEFT_CURLY_BRACKET)
{
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
c = *ptr++;
while (c == '_' || c == '-' || isspace(c))
while (c == '_' || c == '-' || isspace(c))
{
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
c = *ptr++;
}
}
if (c == CHAR_NUL) goto ERROR_RETURN;
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
name[i] = tolower(c);
if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
}

if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
name[i] = 0;
}
Expand Down Expand Up @@ -2159,16 +2159,16 @@ another property can be diagnosed. */
if (vptr != NULL)
{
int offset = 0;
PCRE2_UCHAR sname[8];
PCRE2_UCHAR sname[8];

*vptr = 0; /* Terminate property name */
if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
PRIV(strcmp_c8)(name, STRING_bc) == 0)
{
offset = 4;
sname[0] = CHAR_b;
sname[0] = CHAR_b;
sname[1] = CHAR_i; /* There is no strcpy_c8 function */
sname[2] = CHAR_d;
sname[2] = CHAR_d;
sname[3] = CHAR_i;
}

Expand Down Expand Up @@ -7023,14 +7023,19 @@ for (;; pptr++)
#endif /* MAYBE_UTF_MULTI */

/* Handle the case of a single code unit - either with no UTF support, or
with UTF disabled, or for a single-code-unit UTF character. */
with UTF disabled, or for a single-code-unit UTF character. In the latter
case, for a repeated positive match, get the caseless flag for the
required code unit from the previous character, because a class like [Aa]
sets a caseless A but by now the req_caseopt flag has been reset. */

{
mcbuffer[0] = code[-1];
mclength = 1;
if (op_previous <= OP_CHARI && repeat_min > 1)
{
reqcu = mcbuffer[0];
reqcuflags = req_caseopt | cb->req_varyopt;
reqcuflags = cb->req_varyopt;
if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS;
}
}
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
Expand Down
9 changes: 9 additions & 0 deletions testdata/testinput2
Original file line number Diff line number Diff line change
Expand Up @@ -5923,4 +5923,13 @@ a)"xI

# ---------

/[Aa]{2}/BI
aabcd

/A{2}/iBI
aabcd

/[Aa]{2,3}/BI
aabcd

# End of testinput2
44 changes: 44 additions & 0 deletions testdata/testoutput2
Original file line number Diff line number Diff line change
Expand Up @@ -17702,6 +17702,50 @@ Failed: error -51: NULL argument passed with non-zero length

# ---------

/[Aa]{2}/BI
------------------------------------------------------------------
Bra
/i A{2}
Ket
End
------------------------------------------------------------------
Capture group count = 0
First code unit = 'A' (caseless)
Last code unit = 'A' (caseless)
Subject length lower bound = 2
aabcd
0: aa

/A{2}/iBI
------------------------------------------------------------------
Bra
/i A{2}
Ket
End
------------------------------------------------------------------
Capture group count = 0
Options: caseless
First code unit = 'A' (caseless)
Last code unit = 'A' (caseless)
Subject length lower bound = 2
aabcd
0: aa

/[Aa]{2,3}/BI
------------------------------------------------------------------
Bra
/i A{2}
/i A?+
Ket
End
------------------------------------------------------------------
Capture group count = 0
First code unit = 'A' (caseless)
Last code unit = 'A' (caseless)
Subject length lower bound = 2
aabcd
0: aa

# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
Expand Down

0 comments on commit fdd9479

Please sign in to comment.