From 56c9fca1b15178d9f08a33440e170b346474269d Mon Sep 17 00:00:00 2001 From: surister Date: Wed, 16 Oct 2024 16:45:31 +0200 Subject: [PATCH] Fix missing error context in error collection crashing sqlparse --- CHANGES.md | 1 + .../cratedb_sqlparse/parser.js | 18 ++++++++-- cratedb_sqlparse_js/tests/exceptions.test.js | 34 ++++++++++++++++++- .../cratedb_sqlparse/parser.py | 22 ++++++++++-- cratedb_sqlparse_py/tests/test_exceptions.py | 13 +++++++ 5 files changed, 81 insertions(+), 7 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 497fedd..d5382a6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,7 @@ ## Unreleased - Export `Statement` in both Python and Javascript target - Fixed query parsing when expression includes special characters like `\n`, `\r`, or `\t` +- Fixed sqlparse crash on missing error context ## 2024/09/18 v0.0.7 - Improve error matching on single statement diff --git a/cratedb_sqlparse_js/cratedb_sqlparse/parser.js b/cratedb_sqlparse_js/cratedb_sqlparse/parser.js index 0aef943..96b5380 100644 --- a/cratedb_sqlparse_js/cratedb_sqlparse/parser.js +++ b/cratedb_sqlparse_js/cratedb_sqlparse/parser.js @@ -124,11 +124,22 @@ class ExceptionCollectorListener extends ErrorListener { syntaxError(recognizer, offendingSymbol, line, column, msg, e) { super.syntaxError(recognizer, offendingSymbol, line, column, msg, e); - const error = new ParseError( - e.ctx.parser.getTokenStream().getText(new Interval( + let query; + + if (e) { + query = e.ctx.parser.getTokenStream().getText(new Interval( e.ctx.start, e.offendingToken.tokenIndex) - ), + ) + + } else { + const min_to_check = Math.max(1, offendingSymbol.tokenIndex - 2) + const tokens = recognizer.getTokenStream().tokens.slice(min_to_check, offendingSymbol.tokenIndex + 1) + query = tokens.map((el) => el.text).join("") + } + + const error = new ParseError( + query, msg, offendingSymbol, e @@ -221,6 +232,7 @@ export function sqlparse(query, raise_exception = false) { const statementsContext = tree.children.filter((children) => children instanceof SqlBaseParser.StatementContext) let statements = [] + for (const statementContext of statementsContext) { let stmt = new Statement(statementContext) diff --git a/cratedb_sqlparse_js/tests/exceptions.test.js b/cratedb_sqlparse_js/tests/exceptions.test.js index 0e5f3f7..aaea1ac 100644 --- a/cratedb_sqlparse_js/tests/exceptions.test.js +++ b/cratedb_sqlparse_js/tests/exceptions.test.js @@ -15,7 +15,14 @@ test('Error should be collected and not thrown by default', () => { expect(() => stmts).not.toThrowError() }) -test('Several Errors should be collected and not thrown by default', () => { +test('Single error should be collected', () => { + const stmt = sqlparse("SELECT A,B,C,D FROM tbl1 WHERE A ? '%A'") + expect(stmt[0].exception).toBeDefined() + expect(stmt[0].exception.msg).toBe("mismatched input '?' expecting {, ';'}") + expect(stmt[0].exception.query).toBe("SELECT A,B,C,D FROM tbl1 WHERE A ?") +}) + +test('Several errors should be collected and not thrown by default', () => { const stmts = sqlparse(` SELECT A FROM tbl1 where; SELECT 1; @@ -73,6 +80,31 @@ test('Exception message is correct', () => { }) +test('White or special characters should not avoid exception catching', () => { + // https://github.com/crate/cratedb-sqlparse/issues/67 + const stmts = [ + `SELECT 1\n limit `, + `SELECT 1\r limit `, + `SELECT 1\t limit `, + `SELECT 1 limit ` + ] + for (const stmt in stmts) { + let r = sqlparse(stmt) + expect(r[0].exception).toBeDefined(); + } +}) + +test('Missing token error should not panic', ()=> { + // See https://github.com/crate/cratedb-sqlparse/issues/66 + sqlparse(` + CREATE TABLE t01 ( + "x" OBJECT (DYNAMIC), + "y" OBJECT (DYNAMIC) AS ("z" ARRAY(OBJECT (DYNAMIC)) + ); +`) +}) + + test('Whitetest or special characters should not avoid exception catching', () => { // https://github.com/crate/cratedb-sqlparse/issues/67 const stmts = [ diff --git a/cratedb_sqlparse_py/cratedb_sqlparse/parser.py b/cratedb_sqlparse_py/cratedb_sqlparse/parser.py index e42dcab..83dc77e 100644 --- a/cratedb_sqlparse_py/cratedb_sqlparse/parser.py +++ b/cratedb_sqlparse_py/cratedb_sqlparse/parser.py @@ -114,13 +114,29 @@ def __init__(self): self.errors = [] def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e): + if e: + query = recognizer.getTokenStream().getText(e.ctx.start, offendingSymbol.tokenIndex) + + else: + # If antlr4 doesn't give us an error object, we heuristically create a query, or a piece of it + # so we increase the chances of it being correctly assigned. + # It means that theoretically if you input two wrong queries that antlr4 manages + # to distinguish as two different statements (which is hard already), and both are similar + # the errors could be matched wrongly. Still pretty rare, and it is very hard to come up with + # an actual query that does it. + + # The newly generated query will be either the offendingToken + one token to the left + # or offendingToken + two tokens to the left, if the second is possible it takes precedence. + min_token_to_check = max(1, offendingSymbol.tokenIndex - 2) + tokens = recognizer.getTokenStream().tokens[min_token_to_check : offendingSymbol.tokenIndex + 1] + query = "".join(token.text for token in tokens) + error = ParsingException( msg=msg, offending_token=offendingSymbol, - e=e, - query=e.ctx.parser.getTokenStream().getText(e.ctx.start, e.offendingToken.tokenIndex), + e=e if e else type("NotViableInput", (Exception,), {})(), + query=query, ) - self.errors.append(error) diff --git a/cratedb_sqlparse_py/tests/test_exceptions.py b/cratedb_sqlparse_py/tests/test_exceptions.py index e095369..4717bf0 100644 --- a/cratedb_sqlparse_py/tests/test_exceptions.py +++ b/cratedb_sqlparse_py/tests/test_exceptions.py @@ -98,3 +98,16 @@ def test_sqlparse_catches_exception(): """ for stmt in stmts: assert sqlparse(stmt)[0].exception + + +def test_sqlparse_should_not_panic(): + from cratedb_sqlparse import sqlparse + + sqlparse(""" + CREATE TABLE t01 ( + "x" OBJECT (DYNAMIC), + "y" OBJECT (DYNAMIC) AS ("z" ARRAY(OBJECT (DYNAMIC)) + ); + """)[0] + + # That's it, it shouldn't raise a runtime Exception.