Skip to content

Commit

Permalink
Left recursion support added.
Browse files Browse the repository at this point in the history
  • Loading branch information
sacek committed May 9, 2015
1 parent ffc96ff commit 17bb675
Show file tree
Hide file tree
Showing 10 changed files with 703 additions and 57 deletions.
5 changes: 5 additions & 0 deletions lpcap.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ typedef struct Capture {
byte siz; /* size of full capture + 1 (0 = not a full capture) */
} Capture;

typedef struct CaptureStack {
int captop;
int dyncaptop;
int capsize;
} CaptureStack;

typedef struct CapState {
Capture *cap; /* current capture */
Expand Down
103 changes: 82 additions & 21 deletions lpcode.c
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,28 @@ static int needfollow (TTree *tree) {
}
}

/*
** Check whether a pattern tree has left recursion
*/
int hasleftrecursion (TTree *tree) {
tailcall:
switch (tree->tag) {
case TCall:
return tree->cap;
default: {
switch (numsiblings[tree->tag]) {
case 1: /* return hasleftrecursion(sib1(tree)); */
tree = sib1(tree); goto tailcall;
case 2:
if (hasleftrecursion(sib1(tree))) return 1;
/* else return hasleftrecursion(sib2(tree)); */
tree = sib2(tree); goto tailcall;
default: assert(numsiblings[tree->tag] == 0); return 0;
}
}
}
}

/* }====================================================== */


Expand Down Expand Up @@ -475,8 +497,8 @@ static int addinstruction (CompileState *compst, Opcode op, int aux) {
/*
** Add an instruction followed by space for an offset (to be set later)
*/
static int addoffsetinst (CompileState *compst, Opcode op) {
int i = addinstruction(compst, op, 0); /* instruction */
static int addoffsetinst (CompileState *compst, Opcode op, int val) {
int i = addinstruction(compst, op, val); /* instruction */
addinstruction(compst, (Opcode)0, 0); /* open space for offset */
assert(op == ITestSet || sizei(&getinstr(compst, i)) == 2);
return i;
Expand Down Expand Up @@ -590,15 +612,15 @@ static int codetestset (CompileState *compst, Charset *cs, int e) {
int c = 0;
Opcode op = charsettype(cs->cs, &c);
switch (op) {
case IFail: return addoffsetinst(compst, IJmp); /* always jump */
case IAny: return addoffsetinst(compst, ITestAny);
case IFail: return addoffsetinst(compst, IJmp, 0); /* always jump */
case IAny: return addoffsetinst(compst, ITestAny, 0);
case IChar: {
int i = addoffsetinst(compst, ITestChar);
int i = addoffsetinst(compst, ITestChar, 0);
getinstr(compst, i).i.aux = c;
return i;
}
case ISet: {
int i = addoffsetinst(compst, ITestSet);
int i = addoffsetinst(compst, ITestSet, 0);
addcharset(compst, cs->cs);
return i;
}
Expand Down Expand Up @@ -650,6 +672,18 @@ static void codebehind (CompileState *compst, TTree *tree) {
static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
const Charset *fl) {
int emptyp2 = (p2->tag == TTrue);
if (hasleftrecursion(p1) || hasleftrecursion(p2))
{
int pcommit;
int pchoice = addoffsetinst(compst, IChoice, 0);
codegen(compst, p1, emptyp2, NOINST, fullset);
pcommit = addoffsetinst(compst, ICommit, 0);
jumptohere(compst, pchoice);
codegen(compst, p2, opt, NOINST, fl);
jumptohere(compst, pcommit);
}
else
{
Charset cs1, cs2;
int e1 = getfirst(p1, fullset, &cs1);
if (headfail(p1) ||
Expand All @@ -659,29 +693,30 @@ static void codechoice (CompileState *compst, TTree *p1, TTree *p2, int opt,
int jmp = NOINST;
codegen(compst, p1, 0, test, fl);
if (!emptyp2)
jmp = addoffsetinst(compst, IJmp);
jmp = addoffsetinst(compst, IJmp, 0);
jumptohere(compst, test);
codegen(compst, p2, opt, NOINST, fl);
jumptohere(compst, jmp);
}
else if (opt && emptyp2) {
/* p1? == IPartialCommit; p1 */
jumptohere(compst, addoffsetinst(compst, IPartialCommit));
jumptohere(compst, addoffsetinst(compst, IPartialCommit, 0));
codegen(compst, p1, 1, NOINST, fullset);
}
else {
/* <p1 / p2> ==
test(fail(p1)) -> L1; choice L1; <p1>; commit L2; L1: <p2>; L2: */
int pcommit;
int test = codetestset(compst, &cs1, e1);
int pchoice = addoffsetinst(compst, IChoice);
int pchoice = addoffsetinst(compst, IChoice, 0);
codegen(compst, p1, emptyp2, test, fullset);
pcommit = addoffsetinst(compst, ICommit);
pcommit = addoffsetinst(compst, ICommit, 0);
jumptohere(compst, pchoice);
jumptohere(compst, test);
codegen(compst, p2, opt, NOINST, fl);
jumptohere(compst, pcommit);
}
}
}


Expand All @@ -699,9 +734,9 @@ static void codeand (CompileState *compst, TTree *tree, int tt) {
}
else { /* default: Choice L1; p1; BackCommit L2; L1: Fail; L2: */
int pcommit;
int pchoice = addoffsetinst(compst, IChoice);
int pchoice = addoffsetinst(compst, IChoice, 0);
codegen(compst, tree, 0, tt, fullset);
pcommit = addoffsetinst(compst, IBackCommit);
pcommit = addoffsetinst(compst, IBackCommit, 0);
jumptohere(compst, pchoice);
addinstruction(compst, IFail, 0);
jumptohere(compst, pcommit);
Expand Down Expand Up @@ -754,13 +789,28 @@ static void coderep (CompileState *compst, TTree *tree, int opt,
addcharset(compst, st.cs);
}
else {
if (hasleftrecursion(tree)) {
int commit, l2;
int pchoice = NOINST;
if (opt)
jumptohere(compst, addoffsetinst(compst, IPartialCommit, 0));
else
pchoice = addoffsetinst(compst, IChoice, 0);
l2 = gethere(compst);
codegen(compst, tree, 0, NOINST, fullset);
commit = addoffsetinst(compst, IPartialCommit, 0);
jumptothere(compst, commit, l2);
jumptohere(compst, pchoice);
}
else
{
int e1 = getfirst(tree, fullset, &st);
if (headfail(tree) || (!e1 && cs_disjoint(&st, fl))) {
/* L1: test (fail(p1)) -> L2; <p>; jmp L1; L2: */
int jmp;
int test = codetestset(compst, &st, 0);
codegen(compst, tree, opt, test, fullset);
jmp = addoffsetinst(compst, IJmp);
jmp = addoffsetinst(compst, IJmp, 0);
jumptohere(compst, test);
jumptothere(compst, jmp, test);
}
Expand All @@ -771,16 +821,17 @@ static void coderep (CompileState *compst, TTree *tree, int opt,
int test = codetestset(compst, &st, e1);
int pchoice = NOINST;
if (opt)
jumptohere(compst, addoffsetinst(compst, IPartialCommit));
jumptohere(compst, addoffsetinst(compst, IPartialCommit, 0));
else
pchoice = addoffsetinst(compst, IChoice);
pchoice = addoffsetinst(compst, IChoice, 0);
l2 = gethere(compst);
codegen(compst, tree, 0, NOINST, fullset);
commit = addoffsetinst(compst, IPartialCommit);
commit = addoffsetinst(compst, IPartialCommit, 0);
jumptothere(compst, commit, l2);
jumptohere(compst, pchoice);
jumptohere(compst, test);
}
}
}
}

Expand All @@ -793,19 +844,29 @@ static void coderep (CompileState *compst, TTree *tree, int opt,
** use the default code (a choice plus a failtwice).
*/
static void codenot (CompileState *compst, TTree *tree) {

if (hasleftrecursion(tree)) {
int pchoice = addoffsetinst(compst, IChoice, 0);
codegen(compst, tree, 0, NOINST, fullset);
addinstruction(compst, IFailTwice, 0);
jumptohere(compst, pchoice);
}
else
{
Charset st;
int e = getfirst(tree, fullset, &st);
int test = codetestset(compst, &st, e);
if (headfail(tree)) /* test (fail(p1)) -> L1; fail; L1: */
addinstruction(compst, IFail, 0);
else {
/* test(fail(p))-> L1; choice L1; <p>; failtwice; L1: */
int pchoice = addoffsetinst(compst, IChoice);
int pchoice = addoffsetinst(compst, IChoice, 0);
codegen(compst, tree, 0, NOINST, fullset);
addinstruction(compst, IFailTwice, 0);
jumptohere(compst, pchoice);
}
jumptohere(compst, test);
}
}


Expand All @@ -822,7 +883,7 @@ static void correctcalls (CompileState *compst, int *positions,
int n = code[i].i.key; /* rule number */
int rule = positions[n]; /* rule position */
assert(rule == from || code[rule - 1].i.code == IRet);
if (code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
if (code[i].i.aux == 0 && code[finaltarget(code, i + 2)].i.code == IRet) /* call; ret ? */
code[i].i.code = IJmp; /* tail call */
else
code[i].i.code = ICall;
Expand All @@ -841,8 +902,8 @@ static void codegrammar (CompileState *compst, TTree *grammar) {
int positions[MAXRULES];
int rulenumber = 0;
TTree *rule;
int firstcall = addoffsetinst(compst, ICall); /* call initial rule */
int jumptoend = addoffsetinst(compst, IJmp); /* jump to the end */
int firstcall = addoffsetinst(compst, ICall, sib1(grammar)->lr); /* call initial rule */
int jumptoend = addoffsetinst(compst, IJmp, 0); /* jump to the end */
int start = gethere(compst); /* here starts the initial rule */
jumptohere(compst, firstcall);
for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
Expand All @@ -857,7 +918,7 @@ static void codegrammar (CompileState *compst, TTree *grammar) {


static void codecall (CompileState *compst, TTree *call) {
int c = addoffsetinst(compst, IOpenCall); /* to be corrected later */
int c = addoffsetinst(compst, IOpenCall, call->cap); /* to be corrected later */
getinstr(compst, c).i.key = sib2(call)->cap; /* rule number */
assert(sib2(call)->tag == TRule);
}
Expand Down
2 changes: 2 additions & 0 deletions lpcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ int tocharset (TTree *tree, Charset *cs);
int checkaux (TTree *tree, int pred);
int fixedlenx (TTree *tree, int count, int len);
int hascaptures (TTree *tree);
int hasleftrecursion (TTree *tree);
int lp_gc (lua_State *L);
Instruction *compile (lua_State *L, Pattern *p);
void realloccode (lua_State *L, Pattern *p, int nsize);
Expand All @@ -23,6 +24,7 @@ int sizei (const Instruction *i);

#define PEnullable 0
#define PEnofail 1
#define PEleftrecursion 2

#define nofail(t) checkaux(t, PEnofail)
#define nullable(t) checkaux(t, PEnullable)
Expand Down
41 changes: 36 additions & 5 deletions lptree.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,12 +552,17 @@ static int lp_choice (lua_State *L) {
TTree *t = newcharset(L);
loopset(i, treebuffer(t)[i] = st1.cs[i] | st2.cs[i]);
}
else
{
if (hasleftrecursion(t1) || hasleftrecursion(t2))
newroot2sib(L, TChoice);
else if (nofail(t1) || t2->tag == TFalse)
lua_pushvalue(L, 1); /* true / x => true, x / false => x */
else if (t1->tag == TFalse)
lua_pushvalue(L, 2); /* false / x => x */
else
newroot2sib(L, TChoice);
}
return 1;
}

Expand Down Expand Up @@ -690,7 +695,9 @@ static int lp_behind (lua_State *L) {
** Create a non-terminal
*/
static int lp_V (lua_State *L) {
int val = luaL_optint(L, 2, 0);
TTree *tree = newleaf(L, TOpenCall);
tree->cap = val;
luaL_argcheck(L, !lua_isnoneornil(L, 1), 1, "non-nil value expected");
tree->key = addtonewktable(L, 0, 1);
return 1;
Expand Down Expand Up @@ -935,6 +942,7 @@ static void buildgrammar (lua_State *L, TTree *grammar, int frule, int n) {
nd->tag = TRule;
nd->key = 0;
nd->cap = i; /* rule number */
nd->lr = 0;
nd->u.ps = rulesize + 1; /* point to next rule */
memcpy(sib1(nd), rn, rulesize * sizeof(TTree)); /* copy rule */
mergektable(L, ridx, sib1(nd)); /* merge its ktable into new one */
Expand Down Expand Up @@ -972,8 +980,9 @@ static int verifyerror (lua_State *L, int *passed, int npassed) {
for (i = npassed - 1; i >= 0; i--) { /* search for a repetition */
for (j = i - 1; j >= 0; j--) {
if (passed[i] == passed[j]) {
lua_rawgeti(L, -1, passed[i]); /* get rule's key */
return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
// lua_rawgeti(L, -1, passed[i]); /* get rule's key */
// return luaL_error(L, "rule '%s' may be left recursive", val2str(L, -1));
return PEleftrecursion;
}
}
}
Expand Down Expand Up @@ -1006,12 +1015,15 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
/* return verifyrule(L, sib2(tree), passed, npassed); */
tree = sib2(tree); goto tailcall;
case TSeq: /* only check 2nd child if first is nullable */
if (!verifyrule(L, sib1(tree), passed, npassed, 0))
return nullable;
switch (verifyrule(L, sib1(tree), passed, npassed, 0)) {
case 0: return nullable;
case PEleftrecursion: return PEleftrecursion;
/* else return verifyrule(L, sib2(tree), passed, npassed); */
}
tree = sib2(tree); goto tailcall;
case TChoice: /* must check both children */
nullable = verifyrule(L, sib1(tree), passed, npassed, nullable);
if (nullable == PEleftrecursion) return PEleftrecursion;
/* return verifyrule(L, sib2(tree), passed, npassed, nullable); */
tree = sib2(tree); goto tailcall;
case TRule:
Expand All @@ -1028,19 +1040,38 @@ static int verifyrule (lua_State *L, TTree *tree, int *passed, int npassed,
}
}

/*
**
**
**
*/
static void findleftrecursivecalls (TTree *tree) {
tailcall:
if (tree->tag == TCall && sib2(tree)->lr && !tree->cap)
tree->cap = 1; //Call may be left recursive
switch (numsiblings[tree->tag]) {
case 1: /* findleftrecursivecalls(sib1(tree)); */
tree = sib1(tree); goto tailcall;
case 2:
findleftrecursivecalls(sib1(tree));
tree = sib2(tree); goto tailcall; /* findleftrecursivecalls(sib2(tree)); */
default: assert(numsiblings[tree->tag] == 0); break;
}
}

static void verifygrammar (lua_State *L, TTree *grammar) {
int passed[MAXRULES];
TTree *rule;
/* check left-recursive rules */
for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
if (rule->key == 0) continue; /* unused rule */
verifyrule(L, sib1(rule), passed, 0, 0);
rule->lr = verifyrule(L, sib1(rule), passed, 0, 0) == PEleftrecursion ? 1 : 0;
}
assert(rule->tag == TTrue);
/* check infinite loops inside rules */
for (rule = sib1(grammar); rule->tag == TRule; rule = sib2(rule)) {
if (rule->key == 0) continue; /* unused rule */
findleftrecursivecalls(sib1(rule));
if (checkloops(sib1(rule))) {
lua_rawgeti(L, -1, rule->key); /* get rule's key */
luaL_error(L, "empty loop in rule '%s'", val2str(L, -1));
Expand Down
1 change: 1 addition & 0 deletions lptree.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ extern const byte numsiblings[];
typedef struct TTree {
byte tag;
byte cap; /* kind of capture (if it is a capture) */
byte lr;
unsigned short key; /* key in ktable for Lua data (0 if no key) */
union {
int ps; /* occasional second sibling */
Expand Down
Loading

0 comments on commit 17bb675

Please sign in to comment.