From 7976babd968c8e081ac43078c34999e9beed7064 Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Thu, 9 Nov 2017 23:32:41 +0100 Subject: [PATCH] working on #29: fix crash in jison when reporting an error on an epsilon rule (which has no location info); add / introduce the `lexer::deriveLocationInfo()` API to help you & us to construct a more-or-less useful/sane location info object from the context surrounding it when the requested location info itself is not available. --- packages/ebnf-parser/bnf.y | 2 +- packages/jison-lex/regexp-lexer.js | 170 ++++++++++++++++++++++++++++- 2 files changed, 168 insertions(+), 4 deletions(-) diff --git a/packages/ebnf-parser/bnf.y b/packages/ebnf-parser/bnf.y index 88fad405f..eb9494410 100644 --- a/packages/ebnf-parser/bnf.y +++ b/packages/ebnf-parser/bnf.y @@ -631,7 +631,7 @@ handle_action You cannot specify a precedence override for an epsilon (a.k.a. empty) rule! Erroneous area: - ${yylexer.prettyPrintRange(@handle)} + ${yylexer.prettyPrintRange(@handle, @0, @action /* @handle is very probably NULL! We need this one for some decent location info! */)} `); } $$.push($prec); diff --git a/packages/jison-lex/regexp-lexer.js b/packages/jison-lex/regexp-lexer.js index 2e67d6609..843b11726 100644 --- a/packages/jison-lex/regexp-lexer.js +++ b/packages/jison-lex/regexp-lexer.js @@ -1874,6 +1874,115 @@ return `{ return pre + this.upcomingInput(maxPostfix).replace(/\\s/g, ' ') + '\\n' + c + '^'; }, + /** + * return an YYLLOC info object derived off the given context (actual, preceding, following, current). + * Use this method when the given \`actual\` location is not guaranteed to exist (i.e. when + * it MAY be NULL) and you MUST have a valid location info object anyway: + * then we take the given context of the \`preceding\` and \`following\` locations, IFF those are available, + * and reconstruct the \`actual\` location info from those. + * If this fails, the heuristic is to take the \`current\` location, IFF available. + * If this fails as well, we assume the sought location is at/around the current lexer position + * and then produce that one as a response. DO NOTE that these heuristic/derived location info + * values MAY be inaccurate! + * + * NOTE: \`deriveLocationInfo()\` ALWAYS produces a location info object *copy* of \`actual\`, not just + * a *reference* hence all input location objects can be assumed to be 'constant' (function has no side-effects). + * + * @public + * @this {RegExpLexer} + */ + deriveLocationInfo: function lexer_deriveYYLLOC(actual, preceding, following, current) { + var loc = { + first_line: 1, + first_column: 0, + last_line: 1, + last_column: 0, + + range: [0, 0] + }; + if (actual) { + loc.first_line = actual.first_line | 0; + loc.last_line = actual.last_line | 0; + loc.first_column = actual.first_column | 0; + loc.last_column = actual.last_column | 0; + + if (actual.range) { + loc.range[0] = actual.range[0] | 0; + loc.range[1] = actual.range[1] | 0; + } + } + if (loc.first_line <= 0 || loc.last_line < loc.first_line) { + // plan B: heuristic using preceding and following: + if (loc.first_line <= 0 && preceding) { + loc.first_line = preceding.last_line | 0; + loc.first_column = preceding.last_column | 0; + + if (preceding.range) { + loc.range[0] = actual.range[1] | 0; + } + } + + if ((loc.last_line <= 0 || loc.last_line < loc.first_line) && following) { + loc.last_line = following.first_line | 0; + loc.last_column = following.first_column | 0; + + if (following.range) { + loc.range[1] = actual.range[0] | 0; + } + } + + // plan C?: see if the 'current' location is useful/sane too: + if (loc.first_line <= 0 && current && (loc.last_line <= 0 || current.last_line <= loc.last_line)) { + loc.first_line = current.first_line | 0; + loc.first_column = current.first_column | 0; + + if (current.range) { + loc.range[0] = current.range[0] | 0; + } + } + + if (loc.last_line <= 0 && current && (loc.first_line <= 0 || current.first_line >= loc.first_line)) { + loc.last_line = current.last_line | 0; + loc.last_column = current.last_column | 0; + + if (current.range) { + loc.range[1] = current.range[1] | 0; + } + } + } + // sanitize: fix last_line BEFORE we fix first_line as we use the 'raw' value of the latter + // or plan D heuristics to produce a 'sensible' last_line value: + if (loc.last_line <= 0) { + if (loc.first_line <= 0) { + loc.first_line = this.yylloc.first_line; + loc.last_line = this.yylloc.last_line; + loc.first_column = this.yylloc.first_column; + loc.last_column = this.yylloc.last_column; + + loc.range[0] = this.yylloc.range[0]; + loc.range[1] = this.yylloc.range[1]; + } else { + loc.last_line = this.yylloc.last_line; + loc.last_column = this.yylloc.last_column; + + loc.range[1] = this.yylloc.range[1]; + } + } + if (loc.first_line <= 0) { + loc.first_line = loc.last_line; + loc.first_column = 0; // loc.last_column; + + loc.range[1] = loc.range[0]; + } + if (loc.first_column < 0) { + loc.first_column = 0; + } + if (loc.last_column < 0) { + loc.last_column = (loc.first_column > 0 ? loc.first_column : 80); + } + return loc; + }, + /** * return a string which displays the lines & columns of input which are referenced * by the given location info range, plus a few lines of context. @@ -1920,13 +2029,12 @@ return `{ * @this {RegExpLexer} */ prettyPrintRange: function lexer_prettyPrintRange(loc, context_loc, context_loc2) { - var error_size = loc.last_line - loc.first_line; + loc = this.deriveLocationInfo(loc, context_loc, context_loc2); const CONTEXT = 3; const CONTEXT_TAIL = 1; const MINIMUM_VISIBLE_NONEMPTY_LINE_COUNT = 2; var input = this.matched + this._input; var lines = input.split('\\n'); - //var show_context = (error_size < 5 || context_loc); var l0 = Math.max(1, (context_loc ? context_loc.first_line : loc.first_line - CONTEXT)); var l1 = Math.max(1, (context_loc2 ? context_loc2.last_line : loc.last_line + CONTEXT_TAIL)); var lineno_display_width = (1 + Math.log10(l1 | 1) | 0); @@ -2250,21 +2358,77 @@ return `{ lex: function lexer_lex() { var r; // allow the PRE/POST handlers set/modify the return token for maximum flexibility of the generated lexer: + if (typeof this.pre_lex === 'function') { + r = this.pre_lex.call(this, 0); + } if (typeof this.options.pre_lex === 'function') { - r = this.options.pre_lex.call(this); + // (also account for a userdef function which does not return any value: keep the token as is) + r = this.options.pre_lex.call(this, r) || r; + } + if (this.yy && typeof this.yy.pre_lex === 'function') { + // (also account for a userdef function which does not return any value: keep the token as is) + r = this.yy.pre_lex.call(this, r) || r; } while (!r) { r = this.next(); } + if (this.yy && typeof this.yy.post_lex === 'function') { + // (also account for a userdef function which does not return any value: keep the token as is) + r = this.yy.post_lex.call(this, r) || r; + } if (typeof this.options.post_lex === 'function') { // (also account for a userdef function which does not return any value: keep the token as is) r = this.options.post_lex.call(this, r) || r; } + if (typeof this.post_lex === 'function') { + // (also account for a userdef function which does not return any value: keep the token as is) + r = this.post_lex.call(this, r) || r; + } + return r; + }, + + /** + * return next match that has a token. Identical to the \`lex()\` API but does not invoke any of the + * \`pre_lex()\` nor any of the \`post_lex()\` callbacks. + * + * @public + * @this {RegExpLexer} + */ + fastLex: function lexer_fastLex() { + var r; + + while (!r) { + r = this.next(); + } + return r; }, + /** + * return info about the lexer state that can help a parser or other lexer API user to use the + * most efficient means available. This API is provided to aid run-time performance for larger + * systems which employ this lexer. + * + * @public + * @this {RegExpLexer} + */ + canIUse: function lexer_canIUse() { + var rv = { + fast_lex: !( + typeof this.pre_lex === 'function' || + typeof this.options.pre_lex === 'function' || + (this.yy && typeof this.yy.pre_lex === 'function') || + (this.yy && typeof this.yy.post_lex === 'function') || + typeof this.options.post_lex === 'function' || + typeof this.post_lex === 'function' + ), + }; + return r; + }, + + /** * backwards compatible alias for \`pushState()\`; * the latter is symmetrical with \`popState()\` and we advise to use