diff --git a/ChangeLog b/ChangeLog index 32a38fecd..95fee8d47 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,7 +1,23 @@ Change Log for PCRE2 -------------------- -Version 10.00 05-January-2015 +Version 10.10 13-January-2015 +----------------------------- + +1. When a pattern is compiled, it remembers the highest back reference so that +when matching, if the ovector is too small, extra memory can be obtained to +use instead. A conditional subpattern whose condition is a check on a capture +having happened, such as, for example in the pattern /^(?:(a)|b)(?(1)A|B)/, is +another kind of back reference, but it was not setting the highest +backreference number. This mattered only if pcre2_match() was called with an +ovector that was too small to hold the capture, and there was no other kind of +back reference (a situation which is probably quite rare). The effect of the +bug was that the condition was always treated as FALSE when the capture could +not be consulted, leading to a incorrect behaviour by pcre2_match(). This bug +has been fixed. + + +Version 10.00 05-January-2015 ----------------------------- Version 10.00 is the first release of PCRE2, a revised API for the PCRE diff --git a/configure.ac b/configure.ac index e4e7c4d70..b9ba4e167 100644 --- a/configure.ac +++ b/configure.ac @@ -9,9 +9,9 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [00]) -m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2014-01-05]) +m4_define(pcre2_minor, [10]) +m4_define(pcre2_prerelease, [-RC1]) +m4_define(pcre2_date, [2014-01-13]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. diff --git a/doc/pcre2api.3 b/doc/pcre2api.3 index 183f7fa54..a34dae31e 100644 --- a/doc/pcre2api.3 +++ b/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "02 January 2015" "PCRE2 10.00" +.TH PCRE2API 3 "13 January 2015" "PCRE2 10.10" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1474,8 +1474,12 @@ options returned for PCRE2_INFO_ALLOPTIONS. PCRE2_INFO_BACKREFMAX .sp Return the number of the highest back reference in the pattern. The third -argument should point to an \fBuint32_t\fP variable. Zero is returned if there -are no back references. +argument should point to an \fBuint32_t\fP variable. Named subpatterns acquire +numbers as well as names, and these count towards the highest back reference. +Back references such as \e4 or \eg{12} match the captured characters of the +given group, but in addition, the check that a capturing group is set in a +conditional subpattern such as (?(3)a|b) is also a back reference. Zero is +returned if there are no back references. .sp PCRE2_INFO_BSR .sp @@ -2849,6 +2853,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 02 January 2015 +Last updated: 13 January 2015 Copyright (c) 1997-2015 University of Cambridge. .fi diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 149abe97e..97c2084dd 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -5336,6 +5336,7 @@ for (;; ptr++) goto FAILED; } PUT2(code, 2+LINK_SIZE, recno); + if (recno > cb->top_backref) cb->top_backref = recno; break; } @@ -5355,15 +5356,18 @@ for (;; ptr++) if (i < cb->names_found) { - int offset = i++; - int count = 1; - recno = GET2(slot, 0); /* Number from first found */ - for (; i < cb->names_found; i++) + int offset = i; /* Offset of first name found */ + int count = 0; + + for (;;) { + recno = GET2(slot, 0); /* Number for last found */ + if (recno > cb->top_backref) cb->top_backref = recno; + count++; + if (++i >= cb->names_found) break; slot += cb->name_entry_size; if (PRIV(strncmp)(name, slot+IMM2_SIZE, namelen) != 0 || (slot+IMM2_SIZE)[namelen] != 0) break; - count++; } if (count > 1) diff --git a/testdata/testinput2 b/testdata/testinput2 index 6d0d25908..afd04cc1d 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4116,4 +4116,22 @@ a random value. /Ix /(*NO_DOTSTAR_ANCHOR)(?s).*\d/info +'^(?:(a)|b)(?(1)A|B)' + aA123\=ovector=1 + aA123\=ovector=2 + +'^(?:(?a)|b)(?()A|B)' + aA123\=ovector=1 + aA123\=ovector=2 + +'^(?)(?:(?a)|b)(?()A|B)'dupnames + aA123\=ovector=1 + aA123\=ovector=2 + aA123\=ovector=3 + +'^(?:(?X)|)(?:(?a)|b)\k{AA}'dupnames + aa123\=ovector=1 + aa123\=ovector=2 + aa123\=ovector=3 + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 4d90a9601..5588fdb26 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -1535,28 +1535,33 @@ Subject length lower bound = 3 /a(?(1)b)(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Subject length lower bound = 2 /a(?(1)bag|big)(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Last code unit = 'g' Subject length lower bound = 5 /a(?(1)bag|big)*(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Subject length lower bound = 2 /a(?(1)bag|big)+(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Last code unit = 'g' Subject length lower bound = 5 /a(?(1)b..|b..)(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Last code unit = 'b' Subject length lower bound = 5 @@ -3345,21 +3350,25 @@ Subject length lower bound = 1 /(?(1)ab|ac)(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Subject length lower bound = 3 /(?(1)abz|acz)(.)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'a' Last code unit = 'z' Subject length lower bound = 4 /(?(1)abz)(.)/I Capturing subpattern count = 1 +Max back reference = 1 Subject length lower bound = 1 /(?(1)abz)(1)23/I Capturing subpattern count = 1 +Max back reference = 1 Last code unit = '3' Subject length lower bound = 3 @@ -5923,6 +5932,7 @@ Subject length lower bound = 6 /^(?Pa)?(?(A)a|b)/I Capturing subpattern count = 1 +Max back reference = 1 Named capturing subpatterns: A 1 Compile options: @@ -5940,6 +5950,7 @@ No match /(?:(?(ZZ)a|b)(?PX))+/I Capturing subpattern count = 1 +Max back reference = 1 Named capturing subpatterns: ZZ 1 Last code unit = 'X' @@ -5956,6 +5967,7 @@ Failed: error 115 at offset 9: reference to non-existent subpattern /(?:(?(ZZ)a|b)(?(ZZ)a|b)(?PX))+/I Capturing subpattern count = 1 +Max back reference = 1 Named capturing subpatterns: ZZ 1 Last code unit = 'X' @@ -5966,6 +5978,7 @@ Subject length lower bound = 3 /(?:(?(ZZ)a|\(b\))\\(?PX))+/I Capturing subpattern count = 1 +Max back reference = 1 Named capturing subpatterns: ZZ 1 Last code unit = 'X' @@ -9713,6 +9726,7 @@ Subject length lower bound = 9 (?(1)|.) # check that there was an empty component /Iix Capturing subpattern count = 1 +Max back reference = 1 Compile options: caseless extended Overall options: anchored caseless extended Last code unit = ':' @@ -9740,6 +9754,7 @@ Failed: error 165 at offset 15: different names for subpatterns of the same numb b(? (?')|(?")) ) (?('quote')[a-z]+|[0-9]+)/Ix,dupnames Capturing subpattern count = 6 +Max back reference = 4 Named capturing subpatterns: apostrophe 2 apostrophe 5 @@ -9802,6 +9817,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 4 +Max back reference = 4 Named capturing subpatterns: D 4 D 1 @@ -9849,6 +9865,7 @@ No match End ------------------------------------------------------------------ Capturing subpattern count = 4 +Max back reference = 4 Named capturing subpatterns: A 1 A 4 @@ -9964,6 +9981,7 @@ Subject length lower bound = 1 /()i(?(1)a)/I Capturing subpattern count = 1 +Max back reference = 1 First code unit = 'i' Subject length lower bound = 1 ia @@ -13540,6 +13558,7 @@ No match /(?:(?abc)|xyz)(?(VERSION)yes|no)/I Capturing subpattern count = 1 +Max back reference = 1 Named capturing subpatterns: VERSION 1 Starting code units: a x @@ -13832,4 +13851,46 @@ Compile options: Overall options: dotall no_dotstar_anchor Subject length lower bound = 1 +'^(?:(a)|b)(?(1)A|B)' + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 + 0: aA + 1: a + +'^(?:(?a)|b)(?()A|B)' + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 + 0: aA + 1: a + +'^(?)(?:(?a)|b)(?()A|B)'dupnames + aA123\=ovector=1 +Matched, but too many substrings + 0: aA + aA123\=ovector=2 +Matched, but too many substrings + 0: aA + 1: + aA123\=ovector=3 + 0: aA + 1: + 2: a + +'^(?:(?X)|)(?:(?a)|b)\k{AA}'dupnames + aa123\=ovector=1 +Matched, but too many substrings + 0: aa + aa123\=ovector=2 +Matched, but too many substrings + 0: aa + 1: + aa123\=ovector=3 + 0: aa + 1: + 2: a + # End of testinput2