From a42627bca28f8e896d6f37b4ecfda1797418a12d Mon Sep 17 00:00:00 2001 From: Hannah Wolfe Date: Tue, 17 Jul 2018 16:02:14 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fixed=20single=20character=20lit?= =?UTF-8?q?eral=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit closes #2 - There should be no restriction on the length of a literal - This is a horrible bug that has always existed in the language - Fixed by changing the format of the literal token to expect 1 valid start char followed by 0 or more chars NOTE: - This also had an impact on colon processing - As a result of this fix, it is now invalid to have a colon at the start of a literal - We can revisit this if it turns out to be a problem --- dist/parser.js | 2 +- src/nql.l | 4 ++-- test/lexer.test.js | 18 +++++++++++++----- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/dist/parser.js b/dist/parser.js index 1479ee6..d1c8284 100644 --- a/dist/parser.js +++ b/dist/parser.js @@ -696,7 +696,7 @@ case 18:return 27; break; } }, -rules: [/^(?:\s+)/i,/^(?:NULL\b)/i,/^(?:TRUE\b)/i,/^(?:FALSE\b)/i,/^(?:[a-zA-Z_][a-zA-Z0-9_\.]*[:])/i,/^(?:[0-9]+(\.[0-9]+)?\b(?![\-]))/i,/^(?:\[)/i,/^(?:\])/i,/^(?:([^\s'"\+\,\(\)\>\<=\[\]\-])(\\(['"\+\,\(\)\>\<=\[\]])|([^\s'"\+\,\(\)\>\<=\[\]]))+)/i,/^(?:['](\\['"]|[^'"])+?['])/i,/^(?:\()/i,/^(?:\))/i,/^(?:,)/i,/^(?:\+)/i,/^(?:-)/i,/^(?:>=)/i,/^(?:<=)/i,/^(?:>)/i,/^(?:<)/i], +rules: [/^(?:\s+)/i,/^(?:NULL\b)/i,/^(?:TRUE\b)/i,/^(?:FALSE\b)/i,/^(?:[a-zA-Z_][a-zA-Z0-9_\.]*[:])/i,/^(?:[0-9]+(\.[0-9]+)?\b(?![\-]))/i,/^(?:\[)/i,/^(?:\])/i,/^(?:([^\s'"\+\,\(\)\>\<=\[\]\-:]){1}(\\(['"\+\,\(\)\>\<=\[\]])|([^\s'"\+\,\(\)\>\<=\[\]]))*)/i,/^(?:['](\\['"]|[^'"])+?['])/i,/^(?:\()/i,/^(?:\))/i,/^(?:,)/i,/^(?:\+)/i,/^(?:-)/i,/^(?:>=)/i,/^(?:<=)/i,/^(?:>)/i,/^(?:<)/i], conditions: {"INITIAL":{"rules":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18],"inclusive":true}} }); lexer.parseError = function(errStr, object) { diff --git a/src/nql.l b/src/nql.l index f7eb05d..57df154 100644 --- a/src/nql.l +++ b/src/nql.l @@ -4,7 +4,7 @@ %options case-insensitive -badcharsincnot [^\s'"\+\,\(\)\>\<=\[\]\-] +badcharsincnot [^\s'"\+\,\(\)\>\<=\[\]\-:] badcharsnonot [^\s'"\+\,\(\)\>\<=\[\]] escapable ['"\+\,\(\)\>\<=\[\]] @@ -19,7 +19,7 @@ escapable ['"\+\,\(\)\>\<=\[\]] [0-9]+(\.[0-9]+)?\b(?![\-]) return 'NUMBER'; '[' return 'LBRACKET'; ']' return 'RBRACKET'; -{badcharsincnot}(\\{escapable}|{badcharsnonot})+ return 'LITERAL'; +{badcharsincnot}{1}(\\{escapable}|{badcharsnonot})* return 'LITERAL'; ['](\\['"]|[^'"])+?['] return 'STRING'; '(' return 'LPAREN'; ')' return 'RPAREN'; diff --git a/test/lexer.test.js b/test/lexer.test.js index 0f376b9..f85495b 100644 --- a/test/lexer.test.js +++ b/test/lexer.test.js @@ -38,6 +38,7 @@ describe('Lexer', function () { it('can recognise <=', function () { lex('<=').should.eql([{token: 'LTE', matched: '<='}]); }); + it('cannot recognise :', function () { (function () { lex(':'); @@ -317,11 +318,8 @@ describe('Lexer', function () { }); describe('LITERAL vs PROP', function () { - it('should match colon in string as PROP before, literal after', function () { - lex(':test').should.eql([ - {token: 'LITERAL', matched: ':test'} - ]); - + // We currently do not allow colons to exist at the start of a literal + it('should match colon correctly', function () { lex('te:st').should.eql([ {token: 'PROP', matched: 'te:'}, {token: 'LITERAL', matched: 'st'} @@ -330,6 +328,16 @@ describe('Lexer', function () { lex('test:').should.eql([ {token: 'PROP', matched: 'test:'} ]); + + // We can't match 2 colons, as this would put one at the start of the literal + (function () { + lex('te::st'); + }).should.throw(lexicalError); + + // We can't match a colon at the start of a literal + (function () { + lex(':test'); + }).should.throw(lexicalError); }); it('should only match colon-at-end as PROP if PROP is valPROP', function () {