diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod index 5024d98b667f..c463ff0315bd 100644 --- a/pod/perldebguts.pod +++ b/pod/perldebguts.pod @@ -746,7 +746,6 @@ will be lost. # Regex Subroutines GOSUB num/ofs 2L recurse to paren arg1 at (signed) ofs arg2 - GOSTART no recurse to start of pattern # Special conditionals NGROUPP no-sv 1 Whether the group matched. diff --git a/regcomp.c b/regcomp.c index f8f4b91df368..b789ca30c71b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -168,7 +168,7 @@ struct RExC_state_t { I32 seen_zerolen; regnode **open_parens; /* pointers to open parens */ regnode **close_parens; /* pointers to close parens */ - regnode *end_op; /* END node in program */ + regnode *end_op; /* END node in program */ I32 utf8; /* whether the pattern is utf8 or not */ I32 orig_utf8; /* whether the pattern was originally in utf8 */ /* XXX use this for future optimisation of case @@ -179,7 +179,7 @@ struct RExC_state_t { HV *paren_names; /* Paren names */ regnode **recurse; /* Recurse regops */ - I32 recurse_count; /* Number of recurse regops */ + I32 recurse_count; /* Number of recurse regops we have generated */ U8 *study_chunk_recursed; /* bitmap of which subs we have moved through */ U32 study_chunk_recursed_bytes; /* bytes in bitmap */ @@ -929,9 +929,6 @@ static const scan_data_t zero_scan_data = if (RExC_seen & REG_UNFOLDED_MULTI_SEEN) \ PerlIO_printf(Perl_debug_log,"REG_UNFOLDED_MULTI_SEEN "); \ \ - if (RExC_seen & REG_GOSTART_SEEN) \ - PerlIO_printf(Perl_debug_log,"REG_GOSTART_SEEN "); \ - \ if (RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) \ PerlIO_printf(Perl_debug_log,"REG_UNBOUNDED_QUANTIFIER_SEEN "); \ \ @@ -4656,29 +4653,24 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } else /* single branch is optimized. */ scan = NEXTOPER(scan); continue; - } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) { + } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB) { I32 paren = 0; regnode *start = NULL; regnode *end = NULL; U32 my_recursed_depth= recursed_depth; - - if (OP(scan) != SUSPEND) { /* GOSUB/GOSTART */ + if (OP(scan) != SUSPEND) { /* GOSUB */ /* Do setup, note this code has side effects beyond * the rest of this block. Specifically setting * RExC_recurse[] must happen at least once during * study_chunk(). */ - if (OP(scan) == GOSUB) { - paren = ARG(scan); - RExC_recurse[ARG2L(scan)] = scan; - start = RExC_open_parens[paren-1]; - end = RExC_close_parens[paren-1]; - } else { - start = RExC_rxi->program + 1; - end = RExC_end_op; - } + paren = ARG(scan); + RExC_recurse[ARG2L(scan)] = scan; + start = RExC_open_parens[paren]; + end = RExC_close_parens[paren]; + /* NOTE we MUST always execute the above code, even - * if we do nothing with a GOSUB/GOSTART */ + * if we do nothing with a GOSUB */ if ( ( flags & SCF_IN_DEFINE ) || @@ -5074,8 +5066,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, if (OP(nxt) != CLOSE) goto nogo; if (RExC_open_parens) { - RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/ - RExC_close_parens[ARG(nxt1)-1]=nxt+2; /*close->while*/ + RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/ + RExC_close_parens[ARG(nxt1)]=nxt+2; /*close->while*/ } /* Now we know that nxt2 is the only contents: */ oscan->flags = (U8)ARG(nxt); @@ -5121,8 +5113,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, oscan->flags = (U8)ARG(nxt); if (RExC_open_parens) { - RExC_open_parens[ARG(nxt1)-1]=oscan; /*open->CURLYM*/ - RExC_close_parens[ARG(nxt1)-1]=nxt2+1; /*close->NOTHING*/ + RExC_open_parens[ARG(nxt1)]=oscan; /*open->CURLYM*/ + RExC_close_parens[ARG(nxt1)]=nxt2+1; /*close->NOTHING*/ } OP(nxt1) = OPTIMIZED; /* was OPEN. */ OP(nxt) = OPTIMIZED; /* was CLOSE. */ @@ -7162,24 +7154,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, r->intflags = 0; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ - /* setup various meta data about recursion, this all requires - * RExC_npar to be correctly set, and a bit later on we clear it */ - if (RExC_seen & REG_RECURSE_SEEN) { - Newxz(RExC_open_parens, RExC_npar,regnode *); - SAVEFREEPV(RExC_open_parens); - Newxz(RExC_close_parens,RExC_npar,regnode *); - SAVEFREEPV(RExC_close_parens); - } - if (RExC_seen & (REG_RECURSE_SEEN | REG_GOSTART_SEEN)) { - /* Note, RExC_npar is 1 + the number of parens in a pattern. - * So its 1 if there are no parens. */ - RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) + - ((RExC_npar & 0x07) != 0); - Newx(RExC_study_chunk_recursed, - RExC_study_chunk_recursed_bytes * RExC_npar, U8); - SAVEFREEPV(RExC_study_chunk_recursed); - } - /* Useful during FAIL. */ #ifdef RE_TRACK_PATTERN_OFFSETS Newxz(ri->u.offsets, 2*RExC_size+1, U32); /* MJD 20001228 */ @@ -7199,17 +7173,51 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RExC_parse = exp; RExC_end = exp + plen; RExC_naughty = 0; - RExC_npar = 1; RExC_emit_start = ri->program; RExC_emit = ri->program; RExC_emit_bound = ri->program + RExC_size + 1; pRExC_state->code_index = 0; *((char*) RExC_emit++) = (char) REG_MAGIC; + /* setup various meta data about recursion, this all requires + * RExC_npar to be correctly set, and a bit later on we clear it */ + if (RExC_seen & REG_RECURSE_SEEN) { + DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, + "%*s%*s Setting up open/close parens\n", + 22, "| |", (int)(0 * 2 + 1), "")); + + /* setup RExC_open_parens, which holds the address of each + * OPEN tag, and to make things simpler for the 0 index + * the start of the program - this is used later for offsets */ + Newxz(RExC_open_parens, RExC_npar,regnode *); + SAVEFREEPV(RExC_open_parens); + RExC_open_parens[0] = RExC_emit; + + /* setup RExC_close_parens, which holds the address of each + * CLOSE tag, and to make things simpler for the 0 index + * the end of the program - this is used later for offsets */ + Newxz(RExC_close_parens, RExC_npar,regnode *); + SAVEFREEPV(RExC_close_parens); + /* we dont know where end op starts yet, so we dont + * need to set RExC_close_parens[0] like we do RExC_open_parens[0] above */ + + /* Note, RExC_npar is 1 + the number of parens in a pattern. + * So its 1 if there are no parens. */ + RExC_study_chunk_recursed_bytes= (RExC_npar >> 3) + + ((RExC_npar & 0x07) != 0); + Newx(RExC_study_chunk_recursed, + RExC_study_chunk_recursed_bytes * RExC_npar, U8); + SAVEFREEPV(RExC_study_chunk_recursed); + } + RExC_npar = 1; if (reg(pRExC_state, 0, &flags,1) == NULL) { ReREFCNT_dec(rx); Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags); } + DEBUG_OPTIMISE_r( + PerlIO_printf(Perl_debug_log, "Starting post parse optimization\n"); + ); + /* XXXX To minimize changes to RE engine we always allocate 3-units-long substrs field. */ Newx(r->substrs, 1, struct reg_substr_data); @@ -7609,6 +7617,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, if (r->minlen < minlen) r->minlen = minlen; + if (RExC_seen & REG_RECURSE_SEEN ) + r->intflags |= PREGf_RECURSE_SEEN; if (RExC_seen & REG_GPOS_SEEN) r->intflags |= PREGf_GPOS_SEEN; if (RExC_seen & REG_LOOKBEHIND_SEEN) @@ -7682,14 +7692,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, = (void*)SvREFCNT_inc(RExC_paren_name_list); } else #endif - ri->name_list_idx = 0; + ri->name_list_idx = 0; - if (RExC_recurse_count) { - for ( ; RExC_recurse_count ; RExC_recurse_count-- ) { - const regnode *scan = RExC_recurse[RExC_recurse_count-1]; - ARG2L_SET( scan, RExC_open_parens[ARG(scan)-1] - scan ); - } + while ( RExC_recurse_count > 0 ) { + const regnode *scan = RExC_recurse[ --RExC_recurse_count ]; + ARG2L_SET( scan, RExC_open_parens[ARG(scan)] - scan ); } + Newxz(r->offs, RExC_npar, regexp_paren_pair); /* assume we don't need to swap parens around before we match */ DEBUG_TEST_r({ @@ -10605,13 +10614,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) break; case '0' : /* (?0) */ case 'R' : /* (?R) */ - if (*RExC_parse != ')') + if (RExC_parse == RExC_end || *RExC_parse != ')') FAIL("Sequence (?R) not terminated"); - ret = reg_node(pRExC_state, GOSTART); - RExC_seen |= REG_GOSTART_SEEN; + num = 0; + RExC_seen |= REG_RECURSE_SEEN; *flagp |= POSTPONED; - nextchar(pRExC_state); - return ret; + goto gen_recurse_regop; /*notreached*/ /* named and numeric backreferences */ case '&': /* (?&NAME) */ @@ -10687,6 +10695,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) } else if ( paren == '+' ) { num = RExC_npar + num - 1; } + /* We keep track how many GOSUB items we have produced. + To start off the ARG2L() of the GOSUB holds its "id", + which is used later in conjunction with RExC_recurse + to calculate the offset we need to jump for the GOSUB, + which it will store in the final representation. + We have to defer the actual calculation until much later + as the regop may move. + */ ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count); if (!SIZE_ONLY) { @@ -10701,10 +10717,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) (UV)ARG(ret), (IV)ARG2L(ret))); } RExC_seen |= REG_RECURSE_SEEN; + Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */ Set_Node_Offset(ret, parse_start); /* MJD */ *flagp |= POSTPONED; + assert(*RExC_parse == ')'); nextchar(pRExC_state); return ret; @@ -10848,7 +10866,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) else if (RExC_parse[0] == 'R') { RExC_parse++; /* parno == 0 => /(?(R)YES|NO)/ "in any form of recursion OR eval" - * parno == 1 => /(?(R0)YES|NO)/ "in GOSTART (?0) / (?R)" + * parno == 1 => /(?(R0)YES|NO)/ "in GOSUB (?0) / (?R)" * parno == 2 => /(?(R1)YES|NO)/ "in GOSUB (?1) (parno-1)" */ parno = 0; @@ -10881,7 +10899,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) * will return something, and when SIZE_ONLY is * true, reg_scan_name() just parses the string, * and doesnt return anything. (in theory) */ - assert(SIZE_ONLY ? !sv_dat : sv_dat); + assert(SIZE_ONLY ? !sv_dat : !!sv_dat); if (sv_dat) parno = 1 + *((I32 *)SvPVX(sv_dat)); @@ -11004,14 +11022,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (!SIZE_ONLY ){ if (!RExC_nestroot) RExC_nestroot = parno; - if (RExC_seen & REG_RECURSE_SEEN - && !RExC_open_parens[parno-1]) + if (RExC_open_parens && !RExC_open_parens[parno]) { DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, "%*s%*s Setting open paren #%"IVdf" to %d\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ret))); - RExC_open_parens[parno-1]= ret; + RExC_open_parens[parno]= ret; } } Set_Node_Length(ret, 1); /* MJD */ @@ -11100,11 +11117,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) break; case 1: case 2: ender = reganode(pRExC_state, CLOSE, parno); - if (!SIZE_ONLY && RExC_seen & REG_RECURSE_SEEN) { + if ( RExC_close_parens ) { DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, "%*s%*s Setting close paren #%"IVdf" to %d\n", 22, "| |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender))); - RExC_close_parens[parno-1]= ender; + RExC_close_parens[parno]= ender; if (RExC_nestroot == parno) RExC_nestroot = 0; } @@ -11125,6 +11142,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) if (!SIZE_ONLY) { assert(!RExC_end_op); /* there can only be one! */ RExC_end_op = ender; + if (RExC_close_parens) { + DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log, + "%*s%*s Setting close paren #0 (END) to %d\n", + 22, "| |", (int)(depth * 2 + 1), "", REG_NODE_NUM(ender))); + + RExC_close_parens[0]= ender; + } } break; } @@ -18182,7 +18206,7 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) if (RExC_open_parens) { int paren; /*DEBUG_PARSE_FMT("inst"," - %"IVdf, (IV)RExC_npar);*/ - for ( paren=0 ; paren < RExC_npar ; paren++ ) { + for ( paren=0 ; paren <= RExC_npar ; paren++ ) { if ( RExC_open_parens[paren] >= opnd ) { /*DEBUG_PARSE_FMT("open"," - %d",size);*/ RExC_open_parens[paren] += size; @@ -18197,6 +18221,8 @@ S_reginsert(pTHX_ RExC_state_t *pRExC_state, U8 op, regnode *opnd, U32 depth) } } } + if (RExC_end_op) + RExC_end_op += size; while (src > opnd) { StructCopy(--src, --dst, regnode); @@ -18748,7 +18774,8 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_ } /* Paren and offset */ - Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); + Perl_sv_catpvf(aTHX_ sv, "%d[%+d:%d]", (int)ARG(o),(int)ARG2L(o), + (int)((o + (int)ARG2L(o)) - progi->program) ); if (name_list) { SV **name= av_fetch(name_list, ARG(o), 0 ); if (name) diff --git a/regcomp.h b/regcomp.h index c08888e8f81a..f16197f891b2 100644 --- a/regcomp.h +++ b/regcomp.h @@ -141,6 +141,7 @@ #define PREGf_ANCH_MBOL 0x00000400 #define PREGf_ANCH_SBOL 0x00000800 #define PREGf_ANCH_GPOS 0x00001000 +#define PREGf_RECURSE_SEEN 0x00002000 #define PREGf_ANCH \ ( PREGf_ANCH_SBOL | PREGf_ANCH_GPOS | PREGf_ANCH_MBOL ) @@ -716,7 +717,7 @@ struct regnode_ssc { #define REG_CUTGROUP_SEEN 0x00000100 #define REG_RUN_ON_COMMENT_SEEN 0x00000200 #define REG_UNFOLDED_MULTI_SEEN 0x00000400 -#define REG_GOSTART_SEEN 0x00000800 +/* spare */ #define REG_UNBOUNDED_QUANTIFIER_SEEN 0x00001000 diff --git a/regcomp.sym b/regcomp.sym index 8f9861ab6be6..ac6795527031 100644 --- a/regcomp.sym +++ b/regcomp.sym @@ -190,7 +190,6 @@ AHOCORASICKC TRIE,trie charclass ; Same as AHOCORASICK, but with embedded c #*Regex Subroutines GOSUB GOSUB, num/ofs 2L ; recurse to paren arg1 at (signed) ofs arg2 -GOSTART GOSTART, no ; recurse to start of pattern #*Special conditionals NGROUPP NGROUPP, no-sv 1 ; Whether the group matched. diff --git a/regexec.c b/regexec.c index 66dc245ddc68..5e188fdc77e4 100644 --- a/regexec.c +++ b/regexec.c @@ -6495,7 +6495,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) regexp_internal *rei; regnode *startpoint; - case GOSTART: /* (?R) */ case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */ if (cur_eval && cur_eval->locinput==locinput) { if ( EVAL_CLOSE_PAREN_IS( cur_eval, (U32)ARG(scan) ) ) @@ -6510,13 +6509,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) re_sv = rex_sv; re = rex; rei = rexi; - if ( OP(scan) == GOSUB ) { - startpoint = scan + ARG2L(scan); - ST.close_paren = 1 + ARG(scan); - } else { - startpoint = rei->program + 1; - ST.close_paren = 1; - } + startpoint = scan + ARG2L(scan); + EVAL_CLOSE_PAREN_SET( st, ARG(scan) ); /* ST.close_paren = 1 + ARG(scan) */ /* Save all the positions seen so far. */ ST.cp = regcppush(rex, 0, maxopenparen); @@ -6775,7 +6769,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) reginfo->strend, "Matching embedded"); ); startpoint = rei->program + 1; - ST.close_paren = 0; /* only used for GOSUB */ + EVAL_CLOSE_PAREN_CLEAR(st); /* ST.close_paren = 0; + * close_paren only for GOSUB */ /* Save all the seen positions so far. */ ST.cp = regcppush(rex, 0, maxopenparen); REGCP_SET(ST.lastcp); diff --git a/regexp.h b/regexp.h index ff44df27e7df..02258fe176d7 100644 --- a/regexp.h +++ b/regexp.h @@ -833,13 +833,19 @@ typedef struct regmatch_state { } u; } regmatch_state; -#define EVAL_CLOSE_PAREN_IS(cur_eval,expr) \ +#define EVAL_CLOSE_PAREN_IS(st,expr) \ (\ - ( ( cur_eval ) ) && \ - ( ( cur_eval )->u.eval.close_paren ) && \ - ( ( ( cur_eval )->u.eval.close_paren - 1 ) == ( expr ) ) \ + ( ( st ) ) && \ + ( ( st )->u.eval.close_paren ) && \ + ( ( ( st )->u.eval.close_paren - 1 ) == ( expr ) ) \ ) +#define EVAL_CLOSE_PAREN_SET(st,expr) \ + (st)->u.eval.close_paren = (expr) + 1 + +#define EVAL_CLOSE_PAREN_CLEAR(st) \ + (st)->u.eval.close_paren = 0 + /* how many regmatch_state structs to allocate as a single slab. * We do it in 4K blocks for efficiency. The "3" is 2 for the next/prev * pointers, plus 1 for any mythical malloc overhead. */ diff --git a/regnodes.h b/regnodes.h index f27abe0c7c4d..f820c5684e69 100644 --- a/regnodes.h +++ b/regnodes.h @@ -6,8 +6,8 @@ /* Regops and State definitions */ -#define REGNODE_MAX 93 -#define REGMATCH_STATE_MAX 133 +#define REGNODE_MAX 92 +#define REGMATCH_STATE_MAX 132 #define END 0 /* 0000 End of program. */ #define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */ @@ -88,23 +88,22 @@ #define AHOCORASICK 74 /* 0x4a Aho Corasick stclass. flags==type */ #define AHOCORASICKC 75 /* 0x4b Same as AHOCORASICK, but with embedded charclass data */ #define GOSUB 76 /* 0x4c recurse to paren arg1 at (signed) ofs arg2 */ -#define GOSTART 77 /* 0x4d recurse to start of pattern */ -#define NGROUPP 78 /* 0x4e Whether the group matched. */ -#define INSUBP 79 /* 0x4f Whether we are in a specific recurse. */ -#define DEFINEP 80 /* 0x50 Never execute directly. */ -#define ENDLIKE 81 /* 0x51 Used only for the type field of verbs */ -#define OPFAIL 82 /* 0x52 Same as (?!), but with verb arg */ -#define ACCEPT 83 /* 0x53 Accepts the current matched string, with verbar */ -#define VERB 84 /* 0x54 Used only for the type field of verbs */ -#define PRUNE 85 /* 0x55 Pattern fails at this startpoint if no-backtracking through this */ -#define MARKPOINT 86 /* 0x56 Push the current location for rollback by cut. */ -#define SKIP 87 /* 0x57 On failure skip forward (to the mark) before retrying */ -#define COMMIT 88 /* 0x58 Pattern fails outright if backtracking through this */ -#define CUTGROUP 89 /* 0x59 On failure go to the next alternation in the group */ -#define KEEPS 90 /* 0x5a $& begins here. */ -#define LNBREAK 91 /* 0x5b generic newline pattern */ -#define OPTIMIZED 92 /* 0x5c Placeholder for dump. */ -#define PSEUDO 93 /* 0x5d Pseudo opcode for internal use. */ +#define NGROUPP 77 /* 0x4d Whether the group matched. */ +#define INSUBP 78 /* 0x4e Whether we are in a specific recurse. */ +#define DEFINEP 79 /* 0x4f Never execute directly. */ +#define ENDLIKE 80 /* 0x50 Used only for the type field of verbs */ +#define OPFAIL 81 /* 0x51 Same as (?!), but with verb arg */ +#define ACCEPT 82 /* 0x52 Accepts the current matched string, with verbar */ +#define VERB 83 /* 0x53 Used only for the type field of verbs */ +#define PRUNE 84 /* 0x54 Pattern fails at this startpoint if no-backtracking through this */ +#define MARKPOINT 85 /* 0x55 Push the current location for rollback by cut. */ +#define SKIP 86 /* 0x56 On failure skip forward (to the mark) before retrying */ +#define COMMIT 87 /* 0x57 Pattern fails outright if backtracking through this */ +#define CUTGROUP 88 /* 0x58 On failure go to the next alternation in the group */ +#define KEEPS 89 /* 0x59 $& begins here. */ +#define LNBREAK 90 /* 0x5a generic newline pattern */ +#define OPTIMIZED 91 /* 0x5b Placeholder for dump. */ +#define PSEUDO 92 /* 0x5c Pseudo opcode for internal use. */ /* ------------ States ------------- */ #define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */ #define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */ @@ -230,7 +229,6 @@ EXTCONST U8 PL_regkind[] = { TRIE, /* AHOCORASICK */ TRIE, /* AHOCORASICKC */ GOSUB, /* GOSUB */ - GOSTART, /* GOSTART */ NGROUPP, /* NGROUPP */ INSUBP, /* INSUBP */ DEFINEP, /* DEFINEP */ @@ -373,7 +371,6 @@ static const U8 regarglen[] = { EXTRA_SIZE(struct regnode_1), /* AHOCORASICK */ EXTRA_SIZE(struct regnode_charclass), /* AHOCORASICKC */ EXTRA_SIZE(struct regnode_2L), /* GOSUB */ - 0, /* GOSTART */ EXTRA_SIZE(struct regnode_1), /* NGROUPP */ EXTRA_SIZE(struct regnode_1), /* INSUBP */ EXTRA_SIZE(struct regnode_1), /* DEFINEP */ @@ -472,7 +469,6 @@ static const char reg_off_by_arg[] = { 0, /* AHOCORASICK */ 0, /* AHOCORASICKC */ 0, /* GOSUB */ - 0, /* GOSTART */ 0, /* NGROUPP */ 0, /* INSUBP */ 0, /* DEFINEP */ @@ -577,23 +573,22 @@ EXTCONST char * const PL_reg_name[] = { "AHOCORASICK", /* 0x4a */ "AHOCORASICKC", /* 0x4b */ "GOSUB", /* 0x4c */ - "GOSTART", /* 0x4d */ - "NGROUPP", /* 0x4e */ - "INSUBP", /* 0x4f */ - "DEFINEP", /* 0x50 */ - "ENDLIKE", /* 0x51 */ - "OPFAIL", /* 0x52 */ - "ACCEPT", /* 0x53 */ - "VERB", /* 0x54 */ - "PRUNE", /* 0x55 */ - "MARKPOINT", /* 0x56 */ - "SKIP", /* 0x57 */ - "COMMIT", /* 0x58 */ - "CUTGROUP", /* 0x59 */ - "KEEPS", /* 0x5a */ - "LNBREAK", /* 0x5b */ - "OPTIMIZED", /* 0x5c */ - "PSEUDO", /* 0x5d */ + "NGROUPP", /* 0x4d */ + "INSUBP", /* 0x4e */ + "DEFINEP", /* 0x4f */ + "ENDLIKE", /* 0x50 */ + "OPFAIL", /* 0x51 */ + "ACCEPT", /* 0x52 */ + "VERB", /* 0x53 */ + "PRUNE", /* 0x54 */ + "MARKPOINT", /* 0x55 */ + "SKIP", /* 0x56 */ + "COMMIT", /* 0x57 */ + "CUTGROUP", /* 0x58 */ + "KEEPS", /* 0x59 */ + "LNBREAK", /* 0x5a */ + "OPTIMIZED", /* 0x5b */ + "PSEUDO", /* 0x5c */ /* ------------ States ------------- */ "TRIE_next", /* REGNODE_MAX +0x01 */ "TRIE_next_fail", /* REGNODE_MAX +0x02 */ @@ -702,11 +697,12 @@ EXTCONST char * const PL_reg_intflags_name[] = { "ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */ "ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */ "ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */ + "RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */ }; #endif /* DOINIT */ #ifdef DEBUGGING -# define REG_INTFLAGS_NAME_SIZE 12 +# define REG_INTFLAGS_NAME_SIZE 13 #endif /* The following have no fixed length. U8 so we can do strchr() on it. */