Skip to content

Commit

Permalink
fix for zaach#205/zaach#342: when we encounter an unresolvable confli…
Browse files Browse the repository at this point in the history
…ct in LALR mode, we tag the relevant *production* (it turns out tagging the *state* as well *corrupts* the parse table; something to look into later when my head is clearer as I don't see why, right now) and rerun the state machine generation process, where we make sure the UNION operator for the conflicting production WILL NOT merge it with other productions which may have the same FOLLOW set as this conflicting rule. As such, we fundamentally are 'locally' breaking LALR principles and turning them into LR-style behaviour for the conflict zone alone.

(Side Note To Self: I recall having seen a short 1-column letter to the ACM (published by the ACM) from a few decades ago which mentioned that the algorithm used here is flawed, but cannot track it down in my library, darn it! So no reference to check if there's more amiss then this (zaach#342 + zaach#205). Also would like to check myself if this is IELR 're-invented', either in part or whole? Now I am curious... (Never got more than the abstract for that one and bison code isn't exactly obvious to me either.)  Doubly dang-it!   |:-(   )
  • Loading branch information
GerHobbelt committed Feb 3, 2017
1 parent 93e123d commit cbc0393
Showing 1 changed file with 143 additions and 61 deletions.
204 changes: 143 additions & 61 deletions lib/jison.js
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,9 @@ generator.constructor = function Jison_Generator(grammar, lexGrammarStr, opt) {
this.conflicts = 0;
this.conflicting_states = [];
this.resolutions = [];
this.conflict_productions_LU = {};
this.conflict_states_LU = {};
this.conflict_fixing_round = false;
this.options = options;
this.parseParams = grammar.parseParams;
this.yy = {}; // accessed as yy free variable in the parser/lexer actions
Expand Down Expand Up @@ -2373,6 +2376,17 @@ lrGeneratorMixin.parseTable = function lrParseTable(itemSets) {
self.resolutions.push([k, stackSymbol, sol]);
if (sol.bydefault) {
self.conflicts++;
if (!self.conflict_fixing_round) {
self.conflict_productions_LU[item.production.id] = true;
self.conflict_states_LU[k] = true;

if (devDebug > 4) Jison.print('Registering conflict: ', {
prod_id: item.production.id,
stateNum: k,
state: state,
production: item.production
});
}

self.warn('Conflict in grammar: multiple actions possible when lookahead token is ', stackSymbol, ' in state ', k, '\n- ', printAction(sol.r, self), '\n- ', printAction(sol.s, self), '\n (', sol.msg, ')');
conflictedStates[k] = {
Expand Down Expand Up @@ -5677,79 +5691,98 @@ var lalr = generator.beget(lookaheadMixin, generatorMixin, lrGeneratorMixin, {
}

options = options || {};
this.states = this.canonicalCollection();

if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER canonicalCollection:');
this.displayFollowSets();
Jison.print('\n');
}

this.terms_ = {};
for (var round = 1; round <= 2; round++) {
this.states = this.canonicalCollection();

var newg = this.newg = typal.beget(lookaheadMixin, {
oldg: this,
trace: this.trace,
nterms_: {},
DEBUG: false,
go_: function (productionSymbol, productionHandle) {
var stateNum = productionSymbol.split(':')[0]; // grab state #
assert(stateNum == +stateNum);
stateNum = +stateNum;
productionHandle = productionHandle.map(function (rhsElem) {
return rhsElem.slice(rhsElem.indexOf(':') + 1);
});
return this.oldg.go(stateNum, productionHandle, productionSymbol);
if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER canonicalCollection:');
this.displayFollowSets();
Jison.print('\n');
}
});
newg.nonterminals = {};
newg.productions = [];

//this.inadequateStates = [];
this.terms_ = {};

Jison.print('\n-------------------------------------------\nROUND: ' + round);
var newg = this.newg = typal.beget(lookaheadMixin, {
oldg: this,
trace: this.trace,
nterms_: {},
DEBUG: false,
go_: function (productionSymbol, productionHandle) {
var stateNum = productionSymbol.split(':')[0]; // grab state #
assert(stateNum == +stateNum);
stateNum = +stateNum;
productionHandle = productionHandle.map(function (rhsElem) {
return rhsElem.slice(rhsElem.indexOf(':') + 1);
});
return this.oldg.go(stateNum, productionHandle, productionSymbol);
}
});
newg.nonterminals = {};
newg.productions = [];

// if true, only lookaheads in inadequate states are computed (faster, larger table)
// if false, lookaheads for all reductions will be computed (slower, smaller table)
//
// WARNING: using this has a negative effect on your error reports:
// a lot of 'expected' symbols are reported which are not in the real FOLLOW set,
// resulting in 'illogical' error messages!
this.onDemandLookahead = !!options.onDemandLookahead;
if (this.DEBUG) Jison.print('LALR: using on-demand look-ahead: ', (this.onDemandLookahead ? 'yes' : 'no'));
//this.inadequateStates = [];

this.buildNewGrammar();
// if true, only lookaheads in inadequate states are computed (faster, larger table)
// if false, lookaheads for all reductions will be computed (slower, smaller table)
//
// WARNING: using this has a negative effect on your error reports:
// a lot of 'expected' symbols are reported which are not in the real FOLLOW set,
// resulting in 'illogical' error messages!
this.onDemandLookahead = !!options.onDemandLookahead;
if (this.DEBUG) Jison.print('LALR: using on-demand look-ahead: ', (this.onDemandLookahead ? 'yes' : 'no'));

this.buildNewGrammar();

if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER buildNewGrammar: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER buildNewGrammar: ORIGINAL GRAMMAR');
this.displayFollowSets();
}

if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER buildNewGrammar: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER buildNewGrammar: ORIGINAL GRAMMAR');
this.displayFollowSets();
}
newg.computeLookaheads();

newg.computeLookaheads();
if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER computeLookaheads: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER computeLookaheads: ORIGINAL GRAMMAR');
this.displayFollowSets();
}

if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER computeLookaheads: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER computeLookaheads: ORIGINAL GRAMMAR');
this.displayFollowSets();
}
this.unionLookaheads();

this.unionLookaheads();
if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER unionLookaheads: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER unionLookaheads: ORIGINAL GRAMMAR');
this.displayFollowSets();
}

if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER unionLookaheads: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER unionLookaheads: ORIGINAL GRAMMAR');
this.displayFollowSets();
}
this.table = this.parseTable(this.states);

this.table = this.parseTable(this.states);
if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER parseTable: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER parseTable: ORIGINAL GRAMMAR');
this.displayFollowSets();
}

if (this.DEBUG) {
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER parseTable: NEW GRAMMAR');
newg.displayFollowSets();
Jison.print('\n-------------------------------------------\nSymbol/Follow sets AFTER parseTable: ORIGINAL GRAMMAR');
this.displayFollowSets();
// When some productions are flagged as conflicting, we redo the G' generation and consequent union-ing of the productions
// in the `.goes[]` arrays.
if (this.conflicts === 0) {
break;
}

Jison.print('\n-------------------------------------------\nNew round to fix conflicts?????????????????????????? ', {
round: round,
conflict_fixing_round: this.conflict_fixing_round,
states: this.conflict_states_LU,
productions: this.conflict_productions_LU
});

this.conflict_fixing_round = true;
}

this.defaultActions = findDefaults(this.table, this.hasErrorRecovery);
Expand Down Expand Up @@ -5831,6 +5864,23 @@ var lalr = generator.beget(lookaheadMixin, generatorMixin, lrGeneratorMixin, {

// store the transition that gets 'backed up to' after reduction on path
var handle = item.production.handle.join(' ');
if (self.conflict_fixing_round && self.conflict_states_LU[i]) {
// handle += ':C' + i;
}
if (self.conflict_fixing_round && self.conflict_productions_LU[item.production.id]) {
handle += ':P' + item.production.id;
}

Jison.print('prod creation for: ', {
prod_id: item.production.id,
new_prod_id: p.id,
stateNum: i,
symbol: symbol,
state: state,
production: item.production,
new_production: p
});

var goes = self.states.item(pathInfo.endState).goes;
if (!goes[handle]) {
goes[handle] = [];
Expand Down Expand Up @@ -5860,7 +5910,8 @@ var lalr = generator.beget(lookaheadMixin, generatorMixin, lrGeneratorMixin, {
newg = this.newg;
// var states = !!this.onDemandLookahead ? this.inadequateStates : this.states;

this.states.forEach(function union_states_forEach(state) {
this.states.forEach(function union_states_forEach(state, i) {
i = +i;
//assert(state.inadequate ? this.inadequate : true);
var treat_me = (self.onDemandLookahead ? this.inadequate || state.inadequate : true);
if (state.reductions.length && treat_me) {
Expand All @@ -5870,6 +5921,25 @@ var lalr = generator.beget(lookaheadMixin, generatorMixin, lrGeneratorMixin, {
follows[item.follows[k]] = true;
}
var handle = item.production.handle.join(' ');
if (self.conflict_fixing_round && self.conflict_states_LU[i]) {
// handle += ':C' + i;
}
if (self.conflict_fixing_round && self.conflict_productions_LU[item.production.id]) {
handle += ':P' + item.production.id;
}
if (!state.goes[handle]) {
state.goes[handle] = [];
}

if (devDebug > 2) Jison.print('not-yet-unioned item', {
handle: handle,
item: item,
follows: follows,
goes: state.goes,
state: state,
stateNum: i
});

state.goes[handle].forEach(function reduction_goes_forEach(symbol) {
newg.nonterminals[symbol].follows.forEach(function goes_follows_forEach(symbol) {
var terminal = self.terms_[symbol];
Expand Down Expand Up @@ -5906,6 +5976,18 @@ var lalrGeneratorDebug = {
},
beforeunionLookaheads: function () {
this.trace('Computing lookaheads.');
},
afterbuildNewGrammar: function () {
traceStates(this.trace, this.states, 'after LALR::buildNewGrammar()');
},
afterunionLookaheads: function () {
traceStates(this.trace, this.states, 'after LALR::unionLookaheads()');
},
aftercomputeLookaheads: function () {
traceStates(this.trace, this.states, 'after LALR::computeLookaheads()');
},
aftercanonicalCollection: function (states /* as produced by `this.canonicalCollection()` */ ) {
traceStates(this.trace, states, 'as produced by LALR::canonicalCollection()');
}
};

Expand Down

0 comments on commit cbc0393

Please sign in to comment.