-
Notifications
You must be signed in to change notification settings - Fork 14.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Factors out the validation process to a separate class which does validation inline, rather than passing it through the existing query flow implicitly. This is meant to address Dave's feedback requesting that the validation flow not be explicitly tied to a query transform since that's uniquely a presto-ism. Next up in this stack: unit tests.
- Loading branch information
Alex Berghage
committed
May 2, 2019
1 parent
d076e0b
commit ed9ee8d
Showing
6 changed files
with
276 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
from . import base # noqa | ||
from . import presto_db # noqa | ||
|
||
# TODO: Move this to a config setting | ||
SQL_VALIDATORS_BY_ENGINE = { | ||
'presto': presto_db.PrestoDBSQLValidator | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
from typing import ( | ||
Any, | ||
List, | ||
Optional | ||
) | ||
|
||
class SQLValidationAnnotation: | ||
"""Represents a single annotation (error/warning) in an SQL querytext""" | ||
def __init__( | ||
self, | ||
message: str, | ||
line_number: Optional[int], | ||
start_column: Optional[int], | ||
end_column: Optional[int] | ||
): | ||
self.message = message | ||
self.line_number = line_number | ||
self.start_column = start_column | ||
self.end_column = end_column | ||
|
||
def to_dict(self): | ||
return { | ||
"line_number": self.line_number, | ||
"start_column": self.start_column, | ||
"end_column": self.end_column, | ||
"message": self.message, | ||
} | ||
|
||
|
||
class BaseSQLValidator: | ||
"""BaseSQLValidator defines the interface for checking that a given sql | ||
query is valid for a given database engine.""" | ||
|
||
name = 'BaseSQLValidator' | ||
|
||
@classmethod | ||
def validate( | ||
cls, | ||
sql: str, | ||
schema: str, | ||
database: Any | ||
) -> List[SQLValidationAnnotation]: | ||
"""Check that the given SQL querystring is valid for the given engine""" | ||
raise NotImplementedError |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
import json | ||
import logging | ||
from contextlib import closing | ||
from flask import g | ||
from pyhive.exc import DatabaseError | ||
from typing import ( | ||
Any, | ||
List, | ||
Optional | ||
) | ||
|
||
from superset import app, security_manager | ||
from superset.utils.core import sources | ||
from superset.sql_parse import ParsedQuery | ||
from superset.sql_validators.base import ( | ||
BaseSQLValidator, | ||
SQLValidationAnnotation, | ||
) | ||
|
||
MAX_ERROR_ROWS = 10 | ||
|
||
config = app.config | ||
|
||
class PrestoSQLValidationError(Exception): | ||
"""Error in the process of asking Presto to validate SQL querytext""" | ||
pass | ||
|
||
class PrestoDBSQLValidator(BaseSQLValidator): | ||
"""Validate SQL queries using Presto's built-in EXPLAIN subtype""" | ||
|
||
name = 'PrestoDBSQLValidator' | ||
|
||
@classmethod | ||
def validate_statement( | ||
cls, | ||
statement, | ||
database, | ||
cursor, | ||
user_name | ||
) -> Optional[SQLValidationAnnotation]: | ||
db_engine_spec = database.db_engine_spec | ||
parsed_query = ParsedQuery(statement) | ||
sql = parsed_query.stripped() | ||
|
||
# Hook to allow environment-specific mutation (usually comments) to the SQL | ||
SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') | ||
if SQL_QUERY_MUTATOR: | ||
sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) | ||
|
||
# Transform the final statement to an explain call before sending it on | ||
# to presto to validate | ||
sql = f'EXPLAIN (TYPE VALIDATE) {sql}' | ||
|
||
# Invoke the query against presto. NB this deliberately doesn't use the | ||
# engine spec's handle_cursor implementation since we don't record | ||
# these EXPLAIN queries done in validation as proper Query objects | ||
# in the superset ORM. | ||
try: | ||
db_engine_spec.execute(cursor, sql) | ||
polled = cursor.poll() | ||
while polled: | ||
logging.info('polling presto for validation progress') | ||
stats = polled.get('stats', {}) | ||
if stats: | ||
state = stats.get('state') | ||
if state == 'FINISHED': | ||
break | ||
polled = cursor.poll() | ||
db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) | ||
return None | ||
except DatabaseError as db_error: | ||
if not db_error.args: | ||
raise PrestoSQLValidationError( | ||
"Presto (via pyhive) returned unparseable error text") | ||
db_error = db_error.args[0] | ||
|
||
message = db_error.get('message', "unknown prestodb error") | ||
err_loc = db_error.get('errorLocation', {}) | ||
line_number = err_loc.get('lineNumber', None) | ||
start_column = err_loc.get('columnNumber', None) | ||
end_column = err_loc.get('columnNumber', None) | ||
|
||
return SQLValidationAnnotation( | ||
message=message, | ||
line_number=line_number, | ||
start_column=start_column, | ||
end_column=end_column, | ||
) | ||
except Exception as e: | ||
logging.exception(f'Error running validation query: {e}') | ||
raise e | ||
|
||
@classmethod | ||
def validate( | ||
cls, | ||
sql: str, | ||
schema: str, | ||
database: Any | ||
) -> List[SQLValidationAnnotation]: | ||
""" | ||
Presto supports query-validation queries by running them with a | ||
prepended explain. | ||
For example, "SELECT 1 FROM default.mytable" becomes "EXPLAIN (TYPE | ||
VALIDATE) SELECT 1 FROM default.mytable. | ||
""" | ||
user_name = g.user.username if g.user else None | ||
parsed_query = ParsedQuery(sql) | ||
statements = parsed_query.get_statements() | ||
|
||
logging.debug(f'Validating {len(statements)} statement(s)') | ||
engine = database.get_sqla_engine( | ||
schema=schema, | ||
nullpool=True, | ||
user_name=user_name, | ||
source=sources.get('sql_lab', None), | ||
) | ||
# Sharing a single connection and cursor across the | ||
# execution of all statements (if many) | ||
annotations: List[SQLValidationAnnotation] = [] | ||
with closing(engine.raw_connection()) as conn: | ||
with closing(conn.cursor()) as cursor: | ||
for statement in parsed_query.get_statements(): | ||
annotation = cls.validate_statement( | ||
statement, | ||
database, | ||
cursor, | ||
user_name | ||
) | ||
if annotation: | ||
annotations.append(annotation) | ||
logging.debug(f'Validation found {len(annotations)} error(s)') | ||
|
||
return annotations |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters