From 17bb675e024824008969a9ab48e350e7a2b40320 Mon Sep 17 00:00:00 2001 From: sacek Date: Sat, 9 May 2015 19:11:53 +0200 Subject: [PATCH] Left recursion support added. --- lpcap.h | 5 + lpcode.c | 103 ++++++++++++---- lpcode.h | 2 + lptree.c | 41 ++++++- lptree.h | 1 + lptypes.h | 12 ++ lpvm.c | 354 +++++++++++++++++++++++++++++++++++++++++++++++++++-- re.lua | 6 +- test.lua | 30 ++--- testlr.lua | 206 +++++++++++++++++++++++++++++++ 10 files changed, 703 insertions(+), 57 deletions(-) create mode 100644 testlr.lua diff --git a/lpcap.h b/lpcap.h index d762fdc..355a38b 100644 --- a/lpcap.h +++ b/lpcap.h @@ -23,6 +23,11 @@ typedef struct Capture { byte siz; /* size of full capture + 1 (0 = not a full capture) */ } Capture; +typedef struct CaptureStack { + int captop; + int dyncaptop; + int capsize; +} CaptureStack; typedef struct CapState { Capture *cap; /* current capture */ diff --git a/lpcode.c b/lpcode.c index 93c0d2a..66f3493 100644 --- a/lpcode.c +++ b/lpcode.c @@ -394,6 +394,28 @@ static int needfollow (TTree *tree) { } } +/* +** Check whether a pattern tree has left recursion +*/ +int hasleftrecursion (TTree *tree) { + tailcall: + switch (tree->tag) { + case TCall: + return tree->cap; + default: { + switch (numsiblings[tree->tag]) { + case 1: /* return hasleftrecursion(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case 2: + if (hasleftrecursion(sib1(tree))) return 1; + /* else return hasleftrecursion(sib2(tree)); */ + tree = sib2(tree); goto tailcall; + default: assert(numsiblings[tree->tag] == 0); return 0; + } + } + } +} + /* }====================================================== */ @@ -475,8 +497,8 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) { /* ** Add an instruction followed by space for an offset (to be set later) */ -static int addoffsetinst (CompileState *compst, Opcode op) { - int i = addinstruction(compst, op, 0); /* instruction */ +static int addoffsetinst (CompileState *compst, Opcode op, int val) { + int i = addinstruction(compst, op, val); /* instruction */ addinstruction(compst, (Opcode)0, 0); /* open space for offset */ assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2); return i; @@ -590,15 +612,15 @@ static int codetestset (CompileState *compst, Charset *cs, int e) { int c = 0; Opcode op = charsettype(cs->cs, &c); switch (op) { - case IFail: return addoffsetinst(compst, IJmp); /* always jump */ - case IAny: return addoffsetinst(compst, ITestAny); + case IFail: return addoffsetinst(compst, IJmp, 0); /* always jump */ + case IAny: return addoffsetinst(compst, ITestAny, 0); case IChar: { - int i = addoffsetinst(compst, ITestChar); + int i = addoffsetinst(compst, ITestChar, 0); getinstr(compst, i).i.aux = c; return i; } case ISet: { - int i = addoffsetinst(compst, ITestSet); + int i = addoffsetinst(compst, ITestSet, 0); addcharset(compst, cs->cs); return i; } @@ -650,6 +672,18 @@ static void codebehind (CompileState *compst, TTree *tree) { static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, const Charset *fl) { int emptyp2 = (p2->tag == TTrue); + if (hasleftrecursion(p1) || hasleftrecursion(p2)) + { + int pcommit; + int pchoice = addoffsetinst(compst, IChoice, 0); + codegen(compst, p1, emptyp2, NOINST, fullset); + pcommit = addoffsetinst(compst, ICommit, 0); + jumptohere(compst, pchoice); + codegen(compst, p2, opt, NOINST, fl); + jumptohere(compst, pcommit); + } + else + { Charset cs1, cs2; int e1 = getfirst(p1, fullset, &cs1); if (headfail(p1) || @@ -659,14 +693,14 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, int jmp = NOINST; codegen(compst, p1, 0, test, fl); if (!emptyp2) - jmp = addoffsetinst(compst, IJmp); + jmp = addoffsetinst(compst, IJmp, 0); jumptohere(compst, test); codegen(compst, p2, opt, NOINST, fl); jumptohere(compst, jmp); } else if (opt && emptyp2) { /* p1? == IPartialCommit; p1 */ - jumptohere(compst, addoffsetinst(compst, IPartialCommit)); + jumptohere(compst, addoffsetinst(compst, IPartialCommit, 0)); codegen(compst, p1, 1, NOINST, fullset); } else { @@ -674,14 +708,15 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt, test(fail(p1)) -> L1; choice L1; ; commit L2; L1: ; L2: */ int pcommit; int test = codetestset(compst, &cs1, e1); - int pchoice = addoffsetinst(compst, IChoice); + int pchoice = addoffsetinst(compst, IChoice, 0); codegen(compst, p1, emptyp2, test, fullset); - pcommit = addoffsetinst(compst, ICommit); + pcommit = addoffsetinst(compst, ICommit, 0); jumptohere(compst, pchoice); jumptohere(compst, test); codegen(compst, p2, opt, NOINST, fl); jumptohere(compst, pcommit); } + } } @@ -699,9 +734,9 @@ static void codeand (CompileState *compst, TTree *tree, int tt) { } else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */ int pcommit; - int pchoice = addoffsetinst(compst, IChoice); + int pchoice = addoffsetinst(compst, IChoice, 0); codegen(compst, tree, 0, tt, fullset); - pcommit = addoffsetinst(compst, IBackCommit); + pcommit = addoffsetinst(compst, IBackCommit, 0); jumptohere(compst, pchoice); addinstruction(compst, IFail, 0); jumptohere(compst, pcommit); @@ -754,13 +789,28 @@ static void coderep (CompileState *compst, TTree *tree, int opt, addcharset(compst, st.cs); } else { + if (hasleftrecursion(tree)) { + int commit, l2; + int pchoice = NOINST; + if (opt) + jumptohere(compst, addoffsetinst(compst, IPartialCommit, 0)); + else + pchoice = addoffsetinst(compst, IChoice, 0); + l2 = gethere(compst); + codegen(compst, tree, 0, NOINST, fullset); + commit = addoffsetinst(compst, IPartialCommit, 0); + jumptothere(compst, commit, l2); + jumptohere(compst, pchoice); + } + else + { int e1 = getfirst(tree, fullset, &st); if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) { /* L1: test (fail(p1)) -> L2;

; jmp L1; L2: */ int jmp; int test = codetestset(compst, &st, 0); codegen(compst, tree, opt, test, fullset); - jmp = addoffsetinst(compst, IJmp); + jmp = addoffsetinst(compst, IJmp, 0); jumptohere(compst, test); jumptothere(compst, jmp, test); } @@ -771,16 +821,17 @@ static void coderep (CompileState *compst, TTree *tree, int opt, int test = codetestset(compst, &st, e1); int pchoice = NOINST; if (opt) - jumptohere(compst, addoffsetinst(compst, IPartialCommit)); + jumptohere(compst, addoffsetinst(compst, IPartialCommit, 0)); else - pchoice = addoffsetinst(compst, IChoice); + pchoice = addoffsetinst(compst, IChoice, 0); l2 = gethere(compst); codegen(compst, tree, 0, NOINST, fullset); - commit = addoffsetinst(compst, IPartialCommit); + commit = addoffsetinst(compst, IPartialCommit, 0); jumptothere(compst, commit, l2); jumptohere(compst, pchoice); jumptohere(compst, test); } + } } } @@ -793,6 +844,15 @@ static void coderep (CompileState *compst, TTree *tree, int opt, ** use the default code (a choice plus a failtwice). */ static void codenot (CompileState *compst, TTree *tree) { + + if (hasleftrecursion(tree)) { + int pchoice = addoffsetinst(compst, IChoice, 0); + codegen(compst, tree, 0, NOINST, fullset); + addinstruction(compst, IFailTwice, 0); + jumptohere(compst, pchoice); + } + else + { Charset st; int e = getfirst(tree, fullset, &st); int test = codetestset(compst, &st, e); @@ -800,12 +860,13 @@ static void codenot (CompileState *compst, TTree *tree) { addinstruction(compst, IFail, 0); else { /* test(fail(p))-> L1; choice L1;

; failtwice; L1: */ - int pchoice = addoffsetinst(compst, IChoice); + int pchoice = addoffsetinst(compst, IChoice, 0); codegen(compst, tree, 0, NOINST, fullset); addinstruction(compst, IFailTwice, 0); jumptohere(compst, pchoice); } jumptohere(compst, test); + } } @@ -822,7 +883,7 @@ static void correctcalls (CompileState *compst, int *positions, int n = code[i].i.key; /* rule number */ int rule = positions[n]; /* rule position */ assert(rule == from || code[rule - 1].i.code == IRet); - if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ + if (code[i].i.aux == 0 && code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */ code[i].i.code = IJmp; /* tail call */ else code[i].i.code = ICall; @@ -841,8 +902,8 @@ static void codegrammar (CompileState *compst, TTree *grammar) { int positions[MAXRULES]; int rulenumber = 0; TTree *rule; - int firstcall = addoffsetinst(compst, ICall); /* call initial rule */ - int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */ + int firstcall = addoffsetinst(compst, ICall, sib1(grammar)->lr); /* call initial rule */ + int jumptoend = addoffsetinst(compst, IJmp, 0); /* jump to the end */ int start = gethere(compst); /* here starts the initial rule */ jumptohere(compst, firstcall); for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { @@ -857,7 +918,7 @@ static void codegrammar (CompileState *compst, TTree *grammar) { static void codecall (CompileState *compst, TTree *call) { - int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */ + int c = addoffsetinst(compst, IOpenCall, call->cap); /* to be corrected later */ getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */ assert(sib2(call)->tag == TRule); } diff --git a/lpcode.h b/lpcode.h index 72d2bb9..3a1067b 100644 --- a/lpcode.h +++ b/lpcode.h @@ -15,6 +15,7 @@ int tocharset (TTree *tree, Charset *cs); int checkaux (TTree *tree, int pred); int fixedlenx (TTree *tree, int count, int len); int hascaptures (TTree *tree); +int hasleftrecursion (TTree *tree); int lp_gc (lua_State *L); Instruction *compile (lua_State *L, Pattern *p); void realloccode (lua_State *L, Pattern *p, int nsize); @@ -23,6 +24,7 @@ int sizei (const Instruction *i); #define PEnullable 0 #define PEnofail 1 +#define PEleftrecursion 2 #define nofail(t) checkaux(t, PEnofail) #define nullable(t) checkaux(t, PEnullable) diff --git a/lptree.c b/lptree.c index 3f25345..dc49567 100644 --- a/lptree.c +++ b/lptree.c @@ -552,12 +552,17 @@ static int lp_choice (lua_State *L) { TTree *t = newcharset(L); loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]); } + else + { + if (hasleftrecursion(t1) || hasleftrecursion(t2)) + newroot2sib(L, TChoice); else if (nofail(t1) || t2->tag == TFalse) lua_pushvalue(L, 1); /* true / x => true, x / false => x */ else if (t1->tag == TFalse) lua_pushvalue(L, 2); /* false / x => x */ else newroot2sib(L, TChoice); + } return 1; } @@ -690,7 +695,9 @@ static int lp_behind (lua_State *L) { ** Create a non-terminal */ static int lp_V (lua_State *L) { + int val = luaL_optint(L, 2, 0); TTree *tree = newleaf(L, TOpenCall); + tree->cap = val; luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected"); tree->key = addtonewktable(L, 0, 1); return 1; @@ -935,6 +942,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) { nd->tag = TRule; nd->key = 0; nd->cap = i; /* rule number */ + nd->lr = 0; nd->u.ps = rulesize + 1; /* point to next rule */ memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */ mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */ @@ -972,8 +980,9 @@ static int verifyerror (lua_State *L, int *passed, int npassed) { for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */ for (j = i - 1; j >= 0; j--) { if (passed[i] == passed[j]) { - lua_rawgeti(L, -1, passed[i]); /* get rule's key */ - return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); +// lua_rawgeti(L, -1, passed[i]); /* get rule's key */ +// return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1)); + return PEleftrecursion; } } } @@ -1006,12 +1015,15 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, /* return verifyrule(L, sib2(tree), passed, npassed); */ tree = sib2(tree); goto tailcall; case TSeq: /* only check 2nd child if first is nullable */ - if (!verifyrule(L, sib1(tree), passed, npassed, 0)) - return nullable; + switch (verifyrule(L, sib1(tree), passed, npassed, 0)) { + case 0: return nullable; + case PEleftrecursion: return PEleftrecursion; /* else return verifyrule(L, sib2(tree), passed, npassed); */ + } tree = sib2(tree); goto tailcall; case TChoice: /* must check both children */ nullable = verifyrule(L, sib1(tree), passed, npassed, nullable); + if (nullable == PEleftrecursion) return PEleftrecursion; /* return verifyrule(L, sib2(tree), passed, npassed, nullable); */ tree = sib2(tree); goto tailcall; case TRule: @@ -1028,6 +1040,24 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed, } } +/* +** +** +** +*/ +static void findleftrecursivecalls (TTree *tree) { + tailcall: + if (tree->tag == TCall && sib2(tree)->lr && !tree->cap) + tree->cap = 1; //Call may be left recursive + switch (numsiblings[tree->tag]) { + case 1: /* findleftrecursivecalls(sib1(tree)); */ + tree = sib1(tree); goto tailcall; + case 2: + findleftrecursivecalls(sib1(tree)); + tree = sib2(tree); goto tailcall; /* findleftrecursivecalls(sib2(tree)); */ + default: assert(numsiblings[tree->tag] == 0); break; + } +} static void verifygrammar (lua_State *L, TTree *grammar) { int passed[MAXRULES]; @@ -1035,12 +1065,13 @@ static void verifygrammar (lua_State *L, TTree *grammar) { /* check left-recursive rules */ for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { if (rule->key == 0) continue; /* unused rule */ - verifyrule(L, sib1(rule), passed, 0, 0); + rule->lr = verifyrule(L, sib1(rule), passed, 0, 0) == PEleftrecursion ? 1 : 0; } assert(rule->tag == TTrue); /* check infinite loops inside rules */ for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) { if (rule->key == 0) continue; /* unused rule */ + findleftrecursivecalls(sib1(rule)); if (checkloops(sib1(rule))) { lua_rawgeti(L, -1, rule->key); /* get rule's key */ luaL_error(L, "empty loop in rule '%s'", val2str(L, -1)); diff --git a/lptree.h b/lptree.h index b69528a..96567ab 100644 --- a/lptree.h +++ b/lptree.h @@ -42,6 +42,7 @@ extern const byte numsiblings[]; typedef struct TTree { byte tag; byte cap; /* kind of capture (if it is a capture) */ + byte lr; unsigned short key; /* key in ktable for Lua data (0 if no key) */ union { int ps; /* occasional second sibling */ diff --git a/lptypes.h b/lptypes.h index b9b7933..33fe08d 100644 --- a/lptypes.h +++ b/lptypes.h @@ -65,6 +65,8 @@ /* initial size for capture's list */ #define INITCAPSIZE 32 +/* initial size for capture stack's list */ +#define INITCAPSTACKSIZE 32 /* index, on Lua stack, for subject */ #define SUBJIDX 2 @@ -81,7 +83,17 @@ /* index, on Lua stack, for backtracking stack */ #define stackidx(ptop) ((ptop) + 4) +/* index, on Lua stack, for lambda */ +#define lambdaidx(ptop) ((ptop) + 5) +/* index, on Lua stack, for captures array */ +#define caplistsidx(ptop) ((ptop) + 6) + +/* index, on Lua stack, for captures stack */ +#define capliststackidx(ptop) ((ptop) + 7) + +/* index, on Lua stack, for dyn captures stack */ +#define dyncaplistidx(ptop) ((ptop) + 8) typedef unsigned char byte; diff --git a/lpvm.c b/lpvm.c index cd893ed..efb0ea9 100644 --- a/lpvm.c +++ b/lpvm.c @@ -21,6 +21,7 @@ #define INITBACK 100 #endif +#define LRFAIL -1 #define getoffset(p) (((p) + 1)->offset) @@ -38,6 +39,8 @@ typedef struct Stack { const char *s; /* saved position (or NULL for calls) */ const Instruction *p; /* next instruction */ int caplevel; + const char *X; /* LR */ + const Instruction *pA; } Stack; @@ -47,16 +50,42 @@ typedef struct Stack { /* ** Double the size of the array of captures */ -static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop) { +static Capture *doublecap (lua_State *L, Capture *cap, int captop, int ptop, int capstackptr) { Capture *newc; if (captop >= INT_MAX/((int)sizeof(Capture) * 2)) luaL_error(L, "too many captures"); newc = (Capture *)lua_newuserdata(L, captop * 2 * sizeof(Capture)); memcpy(newc, cap, captop * sizeof(Capture)); lua_replace(L, caplistidx(ptop)); + lua_pushvalue(L, caplistidx(ptop)); // update capture base in Capture Stack + lua_rawseti(L, caplistsidx(ptop), capstackptr); return newc; } +/* +** Double the size of the Stack of captures +*/ +static CaptureStack *doublecapstack (lua_State *L, int capstacktop, int ptop) { + CaptureStack *newcs; + CaptureStack *capstack = ((CaptureStack *)lua_touserdata(L, capliststackidx(ptop))); + if (capstacktop >= INT_MAX/((int)sizeof(CaptureStack) * 2)) + luaL_error(L, "too many captures lists"); + newcs = (CaptureStack *)lua_newuserdata(L, capstacktop * 2 * sizeof(CaptureStack)); + memcpy(newcs, capstack, capstacktop * sizeof(CaptureStack)); + lua_replace(L, capliststackidx(ptop)); + return newcs; +} + +/* +** new capture +*/ +static Capture *newcap (lua_State *L, int size, int ptop, int capstackptr) { + Capture *newc = (Capture *)lua_newuserdata(L, size * sizeof(Capture)); + lua_replace(L, caplistidx(ptop)); + lua_pushvalue(L, caplistidx(ptop)); // update capture base in Capture Stack + lua_rawseti(L, caplistsidx(ptop), capstackptr); + return newc; +} /* ** Double the size of the stack @@ -153,8 +182,24 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, int captop = 0; /* point to first empty slot in captures */ int ndyncap = 0; /* number of dynamic captures (in Lua stack) */ const Instruction *p = op; /* current instruction */ + int maxpointer = e - o; + CaptureStack capstackbase[INITCAPSTACKSIZE]; + CaptureStack *capstack = capstackbase; + int capstacksize = INITCAPSTACKSIZE; + int capstacktop = 0; + stack->X = NULL; stack->p = &giveup; stack->s = s; stack->caplevel = 0; stack++; lua_pushlightuserdata(L, stackbase); + lua_newtable(L); // Lambda (L for left recursion) Lua stack index lambdaidx + lua_newtable(L); // Captures Lists (Captures for left recursion) Lua stack index caplistsidx + lua_pushlightuserdata(L, capstackbase); //capliststackidx(ptop) + lua_newtable(L); // Dynamic capture list dyncaplistidx(ptop) + capstacktop++; + lua_pushvalue(L, caplistidx(ptop)); // set Capture list base to first slot of Captures List array + lua_rawseti(L,caplistsidx(ptop),capstacktop); + capstack->captop = captop; + capstack->dyncaptop = ndyncap; + capstack->capsize = capsize; for (;;) { #if defined(DEBUG) printf("s: |%s| stck:%d, dyncaps:%d, caps:%d ", @@ -162,7 +207,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, printinst(op, p); printcaplist(capture, capture + captop); #endif - assert(stackidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); + assert(dyncaplistidx(ptop) + ndyncap == lua_gettop(L) && ndyncap <= captop); switch ((Opcode)p->i.code) { case IEnd: { assert(stack == getstackbase(L, ptop) + 1); @@ -175,8 +220,105 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, return NULL; } case IRet: { - assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); - p = (--stack)->p; + if (!(stack - 1)->X) { // not LR return + assert(stack > getstackbase(L, ptop) && (stack - 1)->s == NULL); + p = (--stack)->p; + } + else + { + const char* X = (stack - 1)->X; + if (X == (char*)LRFAIL || s > X) { // rule lvar.1 inc.1 + int i; + (stack - 1)->X = s; + p = (stack - 1)->pA; + s = (stack - 1)->s; + (stack - 1)->caplevel = captop; + lua_pushinteger(L, (p - op) * maxpointer + (s - o)); + lua_gettable(L, lambdaidx(ptop)); + lua_pushinteger(L,(stack - 1)->X == (char*)LRFAIL ? LRFAIL : (stack - 1)->X - o); + lua_setfield(L,-2,"X"); + lua_pushvalue(L, caplistidx(ptop)); + lua_setfield(L,-2,"commitcap"); + lua_pushinteger(L, captop); + lua_setfield(L,-2,"commitcaptop"); + lua_newtable(L); + for (i = 1; i <= ndyncap; i++) + { + lua_pushvalue(L,i - ndyncap - 1 - 2); + lua_rawseti(L,-2,i); + } + lua_pushinteger(L, ndyncap); + lua_setfield(L,-3,"commitdyncapcount"); + lua_setfield(L,-2,"commitdyncap"); + lua_pop(L,1); + if (ndyncap > 0) + lua_pop(L, ndyncap); + ndyncap = 0; + captop = 0; + capsize = INITCAPSIZE; + capture = newcap (L, capsize, ptop, capstacktop); + capstack->captop = captop; + capstack->capsize = capsize; + capstack->dyncaptop = ndyncap; + } + else { // rule inc.3 + Capture * commitcapture; + int i, commitcaptop, commitdyncapcount; + stack--; + p = stack->p; + s = stack->X; + lua_pushnil(L); + lua_rawseti(L,caplistsidx(ptop),capstacktop); + lua_pushnil(L); + lua_rawseti(L,dyncaplistidx(ptop),capstacktop); + capstacktop--; + capstack--; + lua_rawgeti(L,caplistsidx(ptop), capstacktop); + capture = ((Capture *)lua_touserdata(L, -1)); + lua_replace(L, caplistidx(ptop)); + captop = capstack->captop; + capsize = capstack->capsize; + lua_pop(L,ndyncap); + ndyncap = capstack->dyncaptop; + lua_rawgeti(L,dyncaplistidx(ptop), capstacktop); + for (i = 1; i <= ndyncap; i++) + { + lua_rawgeti(L, -1, i); + lua_insert(L,-2); + } + lua_pop(L,1); + lua_pushinteger(L, (stack->pA - op) * maxpointer + (stack->s - o)); + lua_gettable(L, lambdaidx(ptop)); + lua_getfield(L,-1,"commitcap"); + commitcapture = ((Capture *)lua_touserdata(L, -1)); + lua_getfield(L,-2,"commitcaptop"); + commitcaptop = lua_tointeger(L, -1); + lua_getfield(L,-3,"commitdyncapcount"); + commitdyncapcount = lua_tointeger(L, -1); + lua_getfield(L,-4,"commitdyncap"); + for (i = 1; i <= commitdyncapcount; i++) + { + lua_rawgeti(L, -1, i); + lua_insert(L,-6); + } + lua_pop(L,5); + for (i = 0; i < commitcaptop; i++) + if (commitcapture[i].kind == Cruntime) + commitcapture[i].idx += ndyncap; + ndyncap += commitdyncapcount; + if (commitcaptop > 0) { + if (captop + commitcaptop >= capsize) { + capture = doublecap(L, capture, captop + commitcaptop, ptop, capstacktop); + capsize = 2 * (captop + commitcaptop); + } + memcpy(capture + captop, commitcapture, commitcaptop * sizeof(Capture)); + captop += commitcaptop; + } + lua_pushinteger(L, (stack->pA - op) * maxpointer + (stack->s - o)); + lua_pushnil(L); + lua_settable(L, lambdaidx(ptop)); + } + } continue; } case IAny: { @@ -237,17 +379,117 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, stack->p = p + getoffset(p); stack->s = s; stack->caplevel = captop; + stack->X = NULL; stack++; p += 2; continue; } case ICall: { + int k = p->i.aux; if (stack == stacklimit) stack = doublestack(L, &stacklimit, ptop); - stack->s = NULL; - stack->p = p + 2; /* save return address */ - stack++; - p += getoffset(p); + if (k == 0) { // not LR call + stack->s = NULL; + stack->X = NULL; + stack->p = p + 2; /* save return address */ + stack++; + p += getoffset(p); + } + else + { + const Instruction *pA = p + getoffset(p); + int index = (pA - op) * maxpointer + (s - o); + lua_pushinteger(L, index); + lua_gettable(L, lambdaidx(ptop)); + if (!lua_istable(L,-1)) { // rule lvar.1 lvar.2 + int i; + lua_pushinteger(L, index); + lua_newtable(L); + lua_pushinteger(L, LRFAIL); + lua_setfield(L,-2,"X"); + lua_pushinteger(L, k); + lua_setfield(L,-2,"k"); + lua_settable(L, lambdaidx(ptop)); + lua_pop(L, 1); + capstack->captop = captop; + capstack->dyncaptop = ndyncap; + lua_newtable(L); + for (i = 1; i <= ndyncap; i++) + { + lua_pushvalue(L,i - ndyncap - 2); + lua_rawseti(L,-2,i); + } + lua_rawseti(L,dyncaplistidx(ptop),capstacktop); + if (capstacktop + 1 >= capstacksize) { + capstack = doublecapstack(L, capstacktop, ptop) + capstacktop - 1; + capstacksize = 2 * capstacktop; + } + capstacktop++; + capstack++; + if (ndyncap > 0) + lua_pop(L, ndyncap); + ndyncap = 0; + captop = 0; + capsize = INITCAPSIZE; + capture = newcap (L, capsize, ptop, capstacktop); + capstack->captop = captop; + capstack->dyncaptop = ndyncap; + capstack->capsize = capsize; + stack->p = p + 2; + stack->pA = pA; + stack->s = s; + stack->X = (char*)LRFAIL; + stack->caplevel = captop; + stack++; + p += getoffset(p); + } + else + { + int i, X_X, X_k, commitdyncapcount; + Capture * commitcapture; + int commitcaptop; + lua_getfield(L, -1, "X"); + X_X = lua_tointeger(L,-1); + lua_getfield(L, -2, "k"); + X_k = lua_tointeger(L,-1); + lua_pop(L, 2); + if (X_X == LRFAIL || k < X_k) // rule lvar.3 lvar.5 + { + lua_pop(L, 1); + goto fail; + } + else // rule lvar.4 + { + lua_getfield(L,-1,"commitcap"); + commitcapture = ((Capture *)lua_touserdata(L, -1)); + lua_getfield(L,-2,"commitcaptop"); + commitcaptop = lua_tointeger(L, -1); + lua_getfield(L,-3,"commitdyncapcount"); + commitdyncapcount = lua_tointeger(L, -1); + lua_getfield(L,-4,"commitdyncap"); + for (i = 1; i <= commitdyncapcount; i++) + { + lua_rawgeti(L, -1, i); + lua_insert(L,-6); + } + lua_pop(L,5); + for (i = 0; i < commitcaptop; i++) + if (commitcapture[i].kind == Cruntime) + commitcapture[i].idx += ndyncap; + ndyncap += commitdyncapcount; + if (commitcaptop > 0) { + if (captop + commitcaptop >= capsize) { + capture = doublecap(L, capture, captop + commitcaptop, ptop, capstacktop); + capsize = 2 * (captop + commitcaptop); + } + memcpy(capture + captop, commitcapture, commitcaptop * sizeof(Capture)); + captop += commitcaptop; + } + p += 2; + s = o + X_X; + } + } + } continue; } case ICommit: { @@ -276,14 +518,100 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, /* go through */ case IFail: fail: { /* pattern failed: try to backtrack */ + const char* X; do { /* remove pending calls */ assert(stack > getstackbase(L, ptop)); s = (--stack)->s; - } while (s == NULL); + X = stack->X; + if (X == (char*)LRFAIL) { // rule lvar.2 rest + int i; + lua_pushnil(L); + lua_rawseti(L,caplistsidx(ptop),capstacktop); + lua_pushnil(L); + lua_rawseti(L,dyncaplistidx(ptop),capstacktop); + capstacktop--; + capstack--; + lua_rawgeti(L,caplistsidx(ptop), capstacktop); + capture = ((Capture *)lua_touserdata(L, -1)); + lua_replace(L, caplistidx(ptop)); + captop = capstack->captop; + capsize = capstack->capsize; + lua_pop(L,ndyncap); + ndyncap = capstack->dyncaptop; + lua_rawgeti(L,dyncaplistidx(ptop), capstacktop); + for (i = 1; i <= ndyncap; i++) + { + lua_rawgeti(L, -1, i); + lua_insert(L,-2); + } + lua_pop(L,1); + lua_pushinteger(L, (stack->pA - op) * maxpointer + (s - o)); + lua_pushnil(L); + lua_settable(L,lambdaidx(ptop)); + } + } while (s == NULL || X == (char*)LRFAIL); + if (ndyncap > 0) /* is there matchtime captures? */ ndyncap -= removedyncap(L, capture, stack->caplevel, captop); - captop = stack->caplevel; p = stack->p; + if (X) // rule inc.2 + { + Capture * commitcapture; + int i,commitcaptop, commitdyncapcount; + s = X; + lua_pushnil(L); + lua_rawseti(L,caplistsidx(ptop),capstacktop); + lua_pushnil(L); + lua_rawseti(L,dyncaplistidx(ptop),capstacktop); + capstacktop--; + capstack--; + lua_rawgeti(L,caplistsidx(ptop), capstacktop); + capture = ((Capture *)lua_touserdata(L, -1)); + lua_replace(L, caplistidx(ptop)); + captop = capstack->captop; + capsize = capstack->capsize; + lua_pop(L,ndyncap); + ndyncap = capstack->dyncaptop; + lua_rawgeti(L,dyncaplistidx(ptop), capstacktop); + for (i = 1; i <= ndyncap; i++) + { + lua_rawgeti(L, -1, i); + lua_insert(L,-2); + } + lua_pop(L,1); + lua_pushinteger(L, (stack->pA - op) * maxpointer + (stack->s - o)); + lua_gettable(L, lambdaidx(ptop)); + lua_getfield(L,-1,"commitcap"); + commitcapture = ((Capture *)lua_touserdata(L, -1)); + lua_getfield(L,-2,"commitcaptop"); + commitcaptop = lua_tointeger(L, -1); + lua_getfield(L,-3,"commitdyncapcount"); + commitdyncapcount = lua_tointeger(L, -1); + lua_getfield(L,-4,"commitdyncap"); + for (i = 1; i <= commitdyncapcount; i++) + { + lua_rawgeti(L, -1, i); + lua_insert(L,-6); + } + lua_pop(L,5); + for (i = 0; i < commitcaptop; i++) + if (commitcapture[i].kind == Cruntime) + commitcapture[i].idx += ndyncap; + ndyncap += commitdyncapcount; + if (commitcaptop > 0) { + if (captop + commitcaptop >= capsize) { + capture = doublecap(L, capture, captop + commitcaptop, ptop, capstacktop); + capsize = 2 * (captop + commitcaptop); + } + memcpy(capture + captop, commitcapture, commitcaptop * sizeof(Capture)); + captop += commitcaptop; + } + lua_pushinteger(L, (stack->pA - op) * maxpointer + (stack->s - o)); + lua_pushnil(L); + lua_settable(L,lambdaidx(ptop)); + } + else + captop = stack->caplevel; continue; } case ICloseRunTime: { @@ -302,11 +630,11 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, ndyncap += n - rem; /* update number of dynamic captures */ if (n > 0) { /* any new capture? */ if ((captop += n + 2) >= capsize) { - capture = doublecap(L, capture, captop, ptop); + capture = doublecap(L, capture, captop, ptop, capstacktop); capsize = 2 * captop; } /* add new captures to 'capture' list */ - adddyncaptures(s, capture + captop - n - 2, n, fr); + adddyncaptures(s, capture + captop - n - 2, n, fr); } p++; continue; @@ -339,7 +667,7 @@ const char *match (lua_State *L, const char *o, const char *s, const char *e, capture[captop].idx = p->i.key; capture[captop].kind = getkind(p); if (++captop >= capsize) { - capture = doublecap(L, capture, captop, ptop); + capture = doublecap(L, capture, captop, ptop, capstacktop); capsize = 2 * captop; } p++; diff --git a/re.lua b/re.lua index 3b9974f..1d8e159 100644 --- a/re.lua +++ b/re.lua @@ -151,10 +151,10 @@ end local function firstdef (n, r) return adddef({n}, n, r) end -local function NT (n, b) +local function NT (n, b, p) if not b then error("rule '"..n.."' used outside a grammar") - else return mm.V(n) + else return mm.V(n, p or 0) end end @@ -193,7 +193,7 @@ local exp = m.P{ "Exp", + "{|" * m.V"Exp" * "|}" / mm.Ct + "{" * m.V"Exp" * "}" / mm.C + m.P"." * m.Cc(any) - + (name * -arrow + "<" * name * ">") * m.Cb("G") / NT; + + (name * m.Cb("G") * (S * ":" * S * num)^-1 * -arrow + "<" * name * m.Cb("G") * (S * ":" * S * num)^-1 * ">") / NT; Definition = name * arrow * m.V"Exp"; Grammar = m.Cg(m.Cc(true), "G") * m.Cf(m.V"Definition" / firstdef * m.Cg(m.V"Definition")^0, diff --git a/test.lua b/test.lua index 40773ab..ab8b03d 100644 --- a/test.lua +++ b/test.lua @@ -359,7 +359,7 @@ local function checkerr (msg, f, ...) assert(not st and m.match({ m.P(msg) + 1 * m.V(1) }, err)) end -checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") +--checkerr("rule '1' may be left recursive", m.match, { m.V(1) * 'a' }, "a") checkerr("rule '1' used outside a grammar", m.match, m.V(1), "") checkerr("rule 'hiii' used outside a grammar", m.match, m.V('hiii'), "") checkerr("rule 'hiii' undefined in given grammar", m.match, { m.V('hiii') }, "") @@ -378,7 +378,7 @@ p = {'a', f = m.V'g', g = m.P'' } -checkerr("rule 'a' may be left recursive", m.match, p, "a") +--checkerr("rule 'a' may be left recursive", m.match, p, "a") -- Bug in peephole optimization of LPeg 0.12 (IJmp -> ICommit) -- the next grammar has an original sequence IJmp -> ICommit -> IJmp L1 @@ -869,21 +869,21 @@ local function badgrammar (g, expected) if expected then assert(find(expected, msg)) end end -badgrammar({[1] = m.V(1)}, "rule '1'") +--badgrammar({[1] = m.V(1)}, "rule '1'") badgrammar({[1] = m.V(2)}, "rule '2'") -- invalid non-terminal badgrammar({[1] = m.V"x"}, "rule 'x'") -- invalid non-terminal badgrammar({[1] = m.V{}}, "rule '(a table)'") -- invalid non-terminal -badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive -badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive -badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive -badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive -badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive +--badgrammar({[1] = #m.P("a") * m.V(1)}, "rule '1'") -- left-recursive +--badgrammar({[1] = -m.P("a") * m.V(1)}, "rule '1'") -- left-recursive +--badgrammar({[1] = -1 * m.V(1)}, "rule '1'") -- left-recursive +--badgrammar({[1] = -1 + m.V(1)}, "rule '1'") -- left-recursive +--badgrammar({[1] = 1 * m.V(2), [2] = m.V(2)}, "rule '2'") -- left-recursive badgrammar({[1] = 1 * m.V(2)^0, [2] = m.P(0)}, "rule '1'") -- inf. loop badgrammar({ m.V(2), m.V(3)^0, m.P"" }, "rule '2'") -- inf. loop badgrammar({ m.V(2) * m.V(3)^0, m.V(3)^0, m.P"" }, "rule '1'") -- inf. loop badgrammar({"x", x = #(m.V(1) * 'a') }, "rule '1'") -- inf. loop -badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop -badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive +--badgrammar({ -(m.V(1) * 'a') }, "rule '1'") -- inf. loop +--badgrammar({"x", x = m.P'a'^-1 * m.V"x"}, "rule 'x'") -- left recursive badgrammar({"x", x = m.P'a' * m.V"y"^1, y = #m.P(1)}, "rule 'x'") assert(m.match({'a' * -m.V(1)}, "aaa") == 2) @@ -895,13 +895,13 @@ m.P{ ('a' * m.V(1))^-1 } m.P{ -('a' * m.V(1)) } m.P{ ('abc' * m.V(1))^-1 } m.P{ -('abc' * m.V(1)) } -badgrammar{ #m.P('abc') * m.V(1) } -badgrammar{ -('a' + m.V(1)) } +--badgrammar{ #m.P('abc') * m.V(1) } +--badgrammar{ -('a' + m.V(1)) } m.P{ #('a' * m.V(1)) } -badgrammar{ #('a' + m.V(1)) } +--badgrammar{ #('a' + m.V(1)) } m.P{ m.B{ m.P'abc' } * 'a' * m.V(1) } -badgrammar{ m.B{ m.P'abc' } * m.V(1) } -badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) } +--badgrammar{ m.B{ m.P'abc' } * m.V(1) } +--badgrammar{ ('a' + m.P'bcd')^-1 * m.V(1) } -- simple tests for maximum sizes: diff --git a/testlr.lua b/testlr.lua new file mode 100644 index 0000000..7911bac --- /dev/null +++ b/testlr.lua @@ -0,0 +1,206 @@ +local lpeg = require"lpeg" +local re = require"re" + +local m = lpeg + + +local function checkeq(x, y, p) + if p then print(x, y) end + if type(x) ~= "table" then assert(x == y) + else + for k, v in pairs(x) do checkeq(v, y[k], p) end + for k, v in pairs(y) do checkeq(v, x[k], p) end + end +end + +print"Tests for LPeg left recursion" + +assert(type(m.version()) == "string") +print("version " .. m.version()) + + +--[[ +direct left recursion +E ← E + n / n +--]] + +local pat = m.P{ + "E"; + E = m.V"E" * '+' * "n" + "n", + +} + +assert(pat:match("n+n+n") == 6) +--[[ +indirect left recursion +L ← P.x / x +P ← P(n) / L +--]] + +local pat = m.P{ + "L"; + L = m.V"P" * ".x" + "x", + P = m.V"P" * "(n)" + m.V"L" +} + +assert(pat:match("x(n)(n).x(n).x") == 15) +--[[ +left and right recursion with precedence rules +E ← E1 + E2 / E1 − E2 / E2 ∗ E3 / E2 ÷ E3 / E3 ∗∗ E3 / − E4 / (E1) / n +--]] + + +local pat = m.P{ + "E", + E = m.V("E", 1) * m.S'+-' * m.V("E", 2) + + m.V("E", 2) * m.S'*/' * m.V("E", 3) + + m.V("E", 3) * '**' * m.V("E", 3) + + '-' * m.V("E", 4) + + '(' * m.V("E") * ')' + + m.R'09' ^ 1, +} + +assert(pat:match("-1*(6+2/4+3-1)**2") == 18) + +--[[ +left and right recursion with precedence rules +E ← E1 + E2 / E1 − E2 / E2 ∗ E3 / E2 ÷ E3 / E3 ∗∗ E3 / − E4 / (E1) / n +create AST tree +--]] + + +local pat = m.P{ + "E", + E = m.Ct(m.V("E", 1) * m.C(m.S'+-') * m.V("E", 2) + + m.V("E", 2) * m.C(m.S'*/') * m.V("E", 3) + + m.V("E", 3) * m.C('**') * m.V("E", 3) + + m.C('-') * m.V("E", 4) + + '(' * m.V("E") * ')' + + m.C(m.R'09' ^ 1)), +} + +local ASTtree = pat:match("1+1+1") +checkeq(ASTtree, { { { "1" }, "+", { "1" } }, "+", { "1" } }) + +local ASTtree = pat:match("-1*(6+2/4+3-1)**2") +checkeq(ASTtree, { { "-", { "1" } }, "*", { { { { { { "6" }, "+", { { "2" }, "/", { "4" } } }, "+", { "3" } }, "-", { "1" } } }, "**", { "2" } } }) + +-- using re module with precedence (the same example as above) +-- call_nonterminal : precedence_level or + +local pat = [[ + E <- (E:1 {[+-]} E:2 / + E:2 {[*/]} E:3 / + E:3 {'**'} E:3 / + {'-'} E:4 / + '(' E ')' / + {[0-9]+}) -> {} +]] + +local ASTtree = re.match("-1*(6+2/4+3-1)**2", pat) +checkeq(ASTtree, { { "-", { "1" } }, "*", { { { { { { "6" }, "+", { { "2" }, "/", { "4" } } }, "+", { "3" } }, "-", { "1" } } }, "**", { "2" } } }) + +--[[ +simple evaluator +E ← E1 + E2 / E1 − E2 / E2 ∗ E3 / E2 ÷ E3 / E3 ∗∗ E3 / − E4 / (E1) / n +--]] + +local eval = function(s, i, p1, p2, p3) + local res + if p2 == '+' then + res = p1 + p3 + elseif p2 == '-' then + res = p1 - p3 + elseif p2 == '*' then + res = p1 * p3 + elseif p2 == '/' then + res = p1 / p3 + elseif p1 == '-' then + res = -p2 + elseif p2 == '**' then + res = p1 ^ p3 + else + res = p1 + end + return true, res +end + + +local pat = m.P{ + "E", + E = m.Cmt(m.V("E", 1) * m.C(m.S'+-') * m.V("E", 2) + + m.V("E", 2) * m.C(m.S'*/') * m.V("E", 3) + + m.V("E", 3) * m.C('**') * m.V("E", 3) + + m.C('-') * m.V("E", 4) + + '(' * m.V("E") * ')' + + m.C(m.R'09' ^ 1), eval), +} + +assert(pat:match("-1*(6+2/4+3-1)**2") == -72.25) + + +local pat = m.P{ + "E", + E = m.V("E", 1) * '+' * m.V("E", 2) / function(c1, c2) return c1 + c2 end + + m.V("E", 1) * '-' * m.V("E", 2) / function(c1, c2) return c1 - c2 end + + m.V("E", 2) * '*' * m.V("E", 3) / function(c1, c2) return c1 * c2 end + + m.V("E", 2) * '/' * m.V("E", 3) / function(c1, c2) return c1 / c2 end + + m.V("E", 3) * '**' * m.V("E", 3) / function(c1, c2) return c1 ^ c2 end + + '-' * m.V("E", 4) / function(c1) return -c1 end + + '(' * m.V("E") * ')' + + m.C(m.R'09' ^ 1), +} + +assert(pat:match("-1*(6+2/4+3-1)**2") == -72.25) + +local def = { + plus = function(p1, p2) return p1 + p2 end, + minus = function(p1, p2) return p1 - p2 end, + mult = function(p1, p2) return p1 * p2 end, + div = function(p1, p2) return p1 / p2 end, + pow = function(p1, p2) return p1 ^ p2 end, + uminus = function(p1) return -p1 end, + errfce = function(o, i) + local errstr = o .. '\n' .. (' '):rep(i) .. '^' .. '\n' + io.write(errstr) + return false + end, +} + +local pat = [[ + P <- E s (!. / error) + s <- %s* + error <- '' => errfce + E <- (E:1 s'+' E:2) -> plus / + (E:1 s'-' E:2) -> minus / + (E:2 s'*' E:3) -> mult / + (E:2 s'/' E:3) -> div / + (E:3 s'**' E:3)-> pow / + (s'-' E:4) -> uminus / + s'(' E s')' / + s{[0-9]+} / + error +]] + +local pat = re.compile(pat, def) +assert(re.match("-1 * (6 + 2 / 4 + 3 - 1)**2", pat) == -72.25) + +local pat = [[ + A <- B "a" + B <- C "b" + C <- B / A / "c" +]] + +local pat = re.compile(pat) +assert(re.match("cbbabbba", pat) == 9) + +local pat = [[ + S <- A / B + A <- A "a" / B / "a" + B <- B "b" / A / "b" +]] + +local pat = re.compile(pat) +assert(re.match("baabbaaa", pat) == 9) + +print"OK"