From d076e0b97afcaa0c84c2ff7f34759c0a030dc4fc Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Tue, 30 Apr 2019 21:35:27 -0600 Subject: [PATCH 01/15] Add "validation_only" queries to the backend This is the first step toward supporting live as-you-type SQL validation against databases that support doing it (for starters just presto). I'm trying to stick as much to the existing query machinery as possible to avoid introducing too much new state for handling this, so I plumbed it through as a db spec parameter and an option you can set on an individual query. As of this commit: - [X] Plumb validation db_spec options - [X] Add validation_only option and transform to the sql_json view - [ ] Tests for the above on the backend - [ ] Debounced requestValidationQuery in onSqlChange - [ ] Check incoming results -- full query vs validation - [ ] Tests for the above on the frontend - [ ] Highlight error lines - [ ] Print error message in alert-danger box above toolbar controls --- superset/db_engine_specs.py | 33 +++++++++++++++++++++++++++++++++ superset/views/core.py | 9 +++++++++ 2 files changed, 42 insertions(+) diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 620ac4ee971cf..63a48900a1503 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -115,6 +115,7 @@ class BaseEngineSpec(object): force_column_alias_quotes = False arraysize = None max_column_name_length = None + supports_validation_queries = False @classmethod def get_time_expr(cls, expr, pdf, time_grain, grain): @@ -479,6 +480,15 @@ def get_timestamp_column(expression, column_name): can be overridden.""" return expression or column_name + @classmethod + def make_validation_query(cls, sql): + """ + If the underlying engine supports it, modify the query sql to request + that the database validate the query instead of running it. + """ + raise Exception( + f'Database engine {cls.engine} does not support validation queries') + class PostgresBaseEngineSpec(BaseEngineSpec): """ Abstract class for Postgres 'like' databases """ @@ -785,6 +795,7 @@ def extract_error_message(cls, e): class PrestoEngineSpec(BaseEngineSpec): engine = 'presto' + supports_validation_queries = True time_grain_functions = { None: '{col}', @@ -1081,6 +1092,17 @@ def latest_sub_partition(cls, table_name, schema, database, **kwargs): return '' return df.to_dict()[field_to_return][0] + @classmethod + def make_validation_query(cls, sql): + """ + Presto supports query-validation queries by prepending explain with a + type parameter. + + For example, "SELECT 1 FROM default.mytable" becomes "EXPLAIN (TYPE + VALIDATE) SELECT 1 FROM default.mytable. + """ + return f'EXPLAIN (TYPE VALIDATE) {sql}' + class HiveEngineSpec(PrestoEngineSpec): @@ -1088,6 +1110,7 @@ class HiveEngineSpec(PrestoEngineSpec): engine = 'hive' max_column_name_length = 767 + supports_validation_queries = False # Scoping regex at class level to avoid recompiling # 17/02/07 19:36:38 INFO ql.Driver: Total jobs = 5 @@ -1392,6 +1415,16 @@ def execute(cursor, query, async_=False): cursor.execute(query, **kwargs) + @classmethod + def make_validation_query(cls, sql): + """ + Hive doesn't support query-validation queries, so we disable the handler + inherited from presto. + """ + raise Exception( + f'Database engine {cls.engine} does not support validation queries') + + class MssqlEngineSpec(BaseEngineSpec): engine = 'mssql' epoch_to_dttm = "dateadd(S, {col}, '1970-01-01')" diff --git a/superset/views/core.py b/superset/views/core.py index c599bfe20658d..0414dd4d9629e 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2512,6 +2512,7 @@ def sql_json(self): sql = request.form.get('sql') database_id = request.form.get('database_id') schema = request.form.get('schema') or None + validate_only = request.form.get('validate_only') == 'true' template_params = json.loads( request.form.get('templateParams') or '{}') limit = int(request.form.get('queryLimit', 0)) @@ -2580,6 +2581,14 @@ def sql_json(self): limits = [mydb.db_engine_spec.get_limit_from_sql(rendered_query), limit] query.limit = min(lim for lim in limits if lim is not None) + # apply validation transform last -- after template processing + if validate_only: + spec = mydb.db_engine_spec + if not spec.supports_validation_queries: + json_error_response('{} does not support validation queries' + .format(spec.engine)) + rendered_query = spec.make_validation_query(rendered_query) + # Async request. if async_: logging.info('Running query on a Celery worker') From ed9ee8da490564a7f55d5231be68d4e71d6d14c3 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Wed, 1 May 2019 17:39:39 -0600 Subject: [PATCH 02/15] Review comments, phase 1. Factors out the validation process to a separate class which does validation inline, rather than passing it through the existing query flow implicitly. This is meant to address Dave's feedback requesting that the validation flow not be explicitly tied to a query transform since that's uniquely a presto-ism. Next up in this stack: unit tests. --- superset/config.py | 3 + superset/db_engine_specs.py | 33 ------ superset/sql_validators/__init__.py | 23 ++++ superset/sql_validators/base.py | 61 +++++++++++ superset/sql_validators/presto_db.py | 151 +++++++++++++++++++++++++++ superset/views/core.py | 46 ++++++-- 6 files changed, 276 insertions(+), 41 deletions(-) create mode 100644 superset/sql_validators/__init__.py create mode 100644 superset/sql_validators/base.py create mode 100644 superset/sql_validators/presto_db.py diff --git a/superset/config.py b/superset/config.py index b402fec9aa2c9..642e687d07816 100644 --- a/superset/config.py +++ b/superset/config.py @@ -417,6 +417,9 @@ class CeleryConfig(object): # Timeout duration for SQL Lab synchronous queries SQLLAB_TIMEOUT = 30 +# Timeout duration for SQL Lab query validation +SQLLAB_VALIDATION_TIMEOUT = 10 + # SQLLAB_DEFAULT_DBID SQLLAB_DEFAULT_DBID = None diff --git a/superset/db_engine_specs.py b/superset/db_engine_specs.py index 63a48900a1503..620ac4ee971cf 100644 --- a/superset/db_engine_specs.py +++ b/superset/db_engine_specs.py @@ -115,7 +115,6 @@ class BaseEngineSpec(object): force_column_alias_quotes = False arraysize = None max_column_name_length = None - supports_validation_queries = False @classmethod def get_time_expr(cls, expr, pdf, time_grain, grain): @@ -480,15 +479,6 @@ def get_timestamp_column(expression, column_name): can be overridden.""" return expression or column_name - @classmethod - def make_validation_query(cls, sql): - """ - If the underlying engine supports it, modify the query sql to request - that the database validate the query instead of running it. - """ - raise Exception( - f'Database engine {cls.engine} does not support validation queries') - class PostgresBaseEngineSpec(BaseEngineSpec): """ Abstract class for Postgres 'like' databases """ @@ -795,7 +785,6 @@ def extract_error_message(cls, e): class PrestoEngineSpec(BaseEngineSpec): engine = 'presto' - supports_validation_queries = True time_grain_functions = { None: '{col}', @@ -1092,17 +1081,6 @@ def latest_sub_partition(cls, table_name, schema, database, **kwargs): return '' return df.to_dict()[field_to_return][0] - @classmethod - def make_validation_query(cls, sql): - """ - Presto supports query-validation queries by prepending explain with a - type parameter. - - For example, "SELECT 1 FROM default.mytable" becomes "EXPLAIN (TYPE - VALIDATE) SELECT 1 FROM default.mytable. - """ - return f'EXPLAIN (TYPE VALIDATE) {sql}' - class HiveEngineSpec(PrestoEngineSpec): @@ -1110,7 +1088,6 @@ class HiveEngineSpec(PrestoEngineSpec): engine = 'hive' max_column_name_length = 767 - supports_validation_queries = False # Scoping regex at class level to avoid recompiling # 17/02/07 19:36:38 INFO ql.Driver: Total jobs = 5 @@ -1415,16 +1392,6 @@ def execute(cursor, query, async_=False): cursor.execute(query, **kwargs) - @classmethod - def make_validation_query(cls, sql): - """ - Hive doesn't support query-validation queries, so we disable the handler - inherited from presto. - """ - raise Exception( - f'Database engine {cls.engine} does not support validation queries') - - class MssqlEngineSpec(BaseEngineSpec): engine = 'mssql' epoch_to_dttm = "dateadd(S, {col}, '1970-01-01')" diff --git a/superset/sql_validators/__init__.py b/superset/sql_validators/__init__.py new file mode 100644 index 0000000000000..fc642e1873c51 --- /dev/null +++ b/superset/sql_validators/__init__.py @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from . import base # noqa +from . import presto_db # noqa + +# TODO: Move this to a config setting +SQL_VALIDATORS_BY_ENGINE = { + 'presto': presto_db.PrestoDBSQLValidator +} diff --git a/superset/sql_validators/base.py b/superset/sql_validators/base.py new file mode 100644 index 0000000000000..a21669d2ed4f9 --- /dev/null +++ b/superset/sql_validators/base.py @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import ( + Any, + List, + Optional +) + +class SQLValidationAnnotation: + """Represents a single annotation (error/warning) in an SQL querytext""" + def __init__( + self, + message: str, + line_number: Optional[int], + start_column: Optional[int], + end_column: Optional[int] + ): + self.message = message + self.line_number = line_number + self.start_column = start_column + self.end_column = end_column + + def to_dict(self): + return { + "line_number": self.line_number, + "start_column": self.start_column, + "end_column": self.end_column, + "message": self.message, + } + + +class BaseSQLValidator: + """BaseSQLValidator defines the interface for checking that a given sql + query is valid for a given database engine.""" + + name = 'BaseSQLValidator' + + @classmethod + def validate( + cls, + sql: str, + schema: str, + database: Any + ) -> List[SQLValidationAnnotation]: + """Check that the given SQL querystring is valid for the given engine""" + raise NotImplementedError diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py new file mode 100644 index 0000000000000..4cf64d47e974c --- /dev/null +++ b/superset/sql_validators/presto_db.py @@ -0,0 +1,151 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import logging +from contextlib import closing +from flask import g +from pyhive.exc import DatabaseError +from typing import ( + Any, + List, + Optional +) + +from superset import app, security_manager +from superset.utils.core import sources +from superset.sql_parse import ParsedQuery +from superset.sql_validators.base import ( + BaseSQLValidator, + SQLValidationAnnotation, +) + +MAX_ERROR_ROWS = 10 + +config = app.config + +class PrestoSQLValidationError(Exception): + """Error in the process of asking Presto to validate SQL querytext""" + pass + +class PrestoDBSQLValidator(BaseSQLValidator): + """Validate SQL queries using Presto's built-in EXPLAIN subtype""" + + name = 'PrestoDBSQLValidator' + + @classmethod + def validate_statement( + cls, + statement, + database, + cursor, + user_name + ) -> Optional[SQLValidationAnnotation]: + db_engine_spec = database.db_engine_spec + parsed_query = ParsedQuery(statement) + sql = parsed_query.stripped() + + # Hook to allow environment-specific mutation (usually comments) to the SQL + SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') + if SQL_QUERY_MUTATOR: + sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) + + # Transform the final statement to an explain call before sending it on + # to presto to validate + sql = f'EXPLAIN (TYPE VALIDATE) {sql}' + + # Invoke the query against presto. NB this deliberately doesn't use the + # engine spec's handle_cursor implementation since we don't record + # these EXPLAIN queries done in validation as proper Query objects + # in the superset ORM. + try: + db_engine_spec.execute(cursor, sql) + polled = cursor.poll() + while polled: + logging.info('polling presto for validation progress') + stats = polled.get('stats', {}) + if stats: + state = stats.get('state') + if state == 'FINISHED': + break + polled = cursor.poll() + db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) + return None + except DatabaseError as db_error: + if not db_error.args: + raise PrestoSQLValidationError( + "Presto (via pyhive) returned unparseable error text") + db_error = db_error.args[0] + + message = db_error.get('message', "unknown prestodb error") + err_loc = db_error.get('errorLocation', {}) + line_number = err_loc.get('lineNumber', None) + start_column = err_loc.get('columnNumber', None) + end_column = err_loc.get('columnNumber', None) + + return SQLValidationAnnotation( + message=message, + line_number=line_number, + start_column=start_column, + end_column=end_column, + ) + except Exception as e: + logging.exception(f'Error running validation query: {e}') + raise e + + @classmethod + def validate( + cls, + sql: str, + schema: str, + database: Any + ) -> List[SQLValidationAnnotation]: + """ + Presto supports query-validation queries by running them with a + prepended explain. + + For example, "SELECT 1 FROM default.mytable" becomes "EXPLAIN (TYPE + VALIDATE) SELECT 1 FROM default.mytable. + """ + user_name = g.user.username if g.user else None + parsed_query = ParsedQuery(sql) + statements = parsed_query.get_statements() + + logging.debug(f'Validating {len(statements)} statement(s)') + engine = database.get_sqla_engine( + schema=schema, + nullpool=True, + user_name=user_name, + source=sources.get('sql_lab', None), + ) + # Sharing a single connection and cursor across the + # execution of all statements (if many) + annotations: List[SQLValidationAnnotation] = [] + with closing(engine.raw_connection()) as conn: + with closing(conn.cursor()) as cursor: + for statement in parsed_query.get_statements(): + annotation = cls.validate_statement( + statement, + database, + cursor, + user_name + ) + if annotation: + annotations.append(annotation) + logging.debug(f'Validation found {len(annotations)} error(s)') + + return annotations diff --git a/superset/views/core.py b/superset/views/core.py index 0414dd4d9629e..f13bdcc4525f5 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -56,6 +56,7 @@ from superset.models.sql_lab import Query from superset.models.user_attributes import UserAttribute from superset.sql_parse import ParsedQuery +from superset.sql_validators import SQL_VALIDATORS_BY_ENGINE from superset.utils import core as utils from superset.utils import dashboard_import_export from superset.utils.dates import now_as_float @@ -2529,6 +2530,43 @@ def sql_json(self): json_error_response( 'Database with id {} is missing.'.format(database_id)) + # Validation request. + if validate_only: + if len(template_params) > 0: + # TODO: factor the Database object out of template rendering + # or provide it as mydb so we can render template params + # without having to also persist a Query ORM object. + return json_error_response( + 'SQL validation does not support template parameters') + + spec = mydb.db_engine_spec + if not spec.engine in SQL_VALIDATORS_BY_ENGINE: + return json_error_response( + 'no SQL validator is configured for {}'.format(spec.engine)) + validator = SQL_VALIDATORS_BY_ENGINE[spec.engine] + + try: + timeout = config.get('SQLLAB_VALIDATION_TIMEOUT') + timeout_msg = ( + f'The query exceeded the {timeout} seconds timeout.') + with utils.timeout(seconds=timeout, + error_message=timeout_msg): + errors = validator.validate(sql, schema, mydb) + payload = json.dumps( + [err.to_dict() for err in errors], + default=utils.pessimistic_json_iso_dttm_ser, + ignore_nan=True, + encoding=None, + ) + return json_success(payload) + except Exception as e: + logging.exception(e) + msg = _( + 'Failed to validate your SQL query text. Please check that ' + f'you have configured the {validator.name} validator ' + 'correctly and that any services it depends on are up.') + return json_error_response(f'{msg}') + rejected_tables = security_manager.rejected_datasources(sql, mydb, schema) if rejected_tables: return json_error_response( @@ -2581,14 +2619,6 @@ def sql_json(self): limits = [mydb.db_engine_spec.get_limit_from_sql(rendered_query), limit] query.limit = min(lim for lim in limits if lim is not None) - # apply validation transform last -- after template processing - if validate_only: - spec = mydb.db_engine_spec - if not spec.supports_validation_queries: - json_error_response('{} does not support validation queries' - .format(spec.engine)) - rendered_query = spec.make_validation_query(rendered_query) - # Async request. if async_: logging.info('Running query on a Celery worker') From 022f97de1575ec839c501fe490a3fc9ff001ce73 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 00:39:12 -0600 Subject: [PATCH 03/15] Split validation off to a separate endpoint Just to demo it and see if it's cleaner this wak. --- superset/sql_validators/presto_db.py | 2 +- superset/views/core.py | 92 ++++++++++++++++------------ 2 files changed, 55 insertions(+), 39 deletions(-) diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index 4cf64d47e974c..4493dce9b9d57 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -86,7 +86,7 @@ def validate_statement( db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) return None except DatabaseError as db_error: - if not db_error.args: + if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( "Presto (via pyhive) returned unparseable error text") db_error = db_error.args[0] diff --git a/superset/views/core.py b/superset/views/core.py index f13bdcc4525f5..6f4acd3122423 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2504,6 +2504,60 @@ def stop_query(self): pass return self.json_response('OK') + @has_access_api + @expose('/validate_sql_json/', methods=['POST', 'GET']) + @log_this + def validate_sql_json(self): + """Validates that arbitrary sql is acceptable for the given database. + Returns a list of error/warning annotations as json. + """ + sql = request.form.get('sql') + database_id = request.form.get('database_id') + schema = request.form.get('schema') or None + template_params = json.loads( + request.form.get('templateParams') or '{}') + + if len(template_params) > 0: + # TODO: factor the Database object out of template rendering + # or provide it as mydb so we can render template params + # without having to also persist a Query ORM object. + return json_error_response( + 'SQL validation does not support template parameters') + + session = db.session() + mydb = session.query(models.Database).filter_by(id=database_id).first() + if not mydb: + json_error_response( + 'Database with id {} is missing.'.format(database_id)) + + spec = mydb.db_engine_spec + if not spec.engine in SQL_VALIDATORS_BY_ENGINE: + return json_error_response( + 'no SQL validator is configured for {}'.format(spec.engine)) + validator = SQL_VALIDATORS_BY_ENGINE[spec.engine] + + try: + timeout = config.get('SQLLAB_VALIDATION_TIMEOUT') + timeout_msg = ( + f'The query exceeded the {timeout} seconds timeout.') + with utils.timeout(seconds=timeout, + error_message=timeout_msg): + errors = validator.validate(sql, schema, mydb) + payload = json.dumps( + [err.to_dict() for err in errors], + default=utils.pessimistic_json_iso_dttm_ser, + ignore_nan=True, + encoding=None, + ) + return json_success(payload) + except Exception as e: + logging.exception(e) + msg = _( + 'Failed to validate your SQL query text. Please check that ' + f'you have configured the {validator.name} validator ' + 'correctly and that any services it depends on are up.') + return json_error_response(f'{msg}') + @has_access_api @expose('/sql_json/', methods=['POST', 'GET']) @log_this @@ -2513,7 +2567,6 @@ def sql_json(self): sql = request.form.get('sql') database_id = request.form.get('database_id') schema = request.form.get('schema') or None - validate_only = request.form.get('validate_only') == 'true' template_params = json.loads( request.form.get('templateParams') or '{}') limit = int(request.form.get('queryLimit', 0)) @@ -2530,43 +2583,6 @@ def sql_json(self): json_error_response( 'Database with id {} is missing.'.format(database_id)) - # Validation request. - if validate_only: - if len(template_params) > 0: - # TODO: factor the Database object out of template rendering - # or provide it as mydb so we can render template params - # without having to also persist a Query ORM object. - return json_error_response( - 'SQL validation does not support template parameters') - - spec = mydb.db_engine_spec - if not spec.engine in SQL_VALIDATORS_BY_ENGINE: - return json_error_response( - 'no SQL validator is configured for {}'.format(spec.engine)) - validator = SQL_VALIDATORS_BY_ENGINE[spec.engine] - - try: - timeout = config.get('SQLLAB_VALIDATION_TIMEOUT') - timeout_msg = ( - f'The query exceeded the {timeout} seconds timeout.') - with utils.timeout(seconds=timeout, - error_message=timeout_msg): - errors = validator.validate(sql, schema, mydb) - payload = json.dumps( - [err.to_dict() for err in errors], - default=utils.pessimistic_json_iso_dttm_ser, - ignore_nan=True, - encoding=None, - ) - return json_success(payload) - except Exception as e: - logging.exception(e) - msg = _( - 'Failed to validate your SQL query text. Please check that ' - f'you have configured the {validator.name} validator ' - 'correctly and that any services it depends on are up.') - return json_error_response(f'{msg}') - rejected_tables = security_manager.rejected_datasources(sql, mydb, schema) if rejected_tables: return json_error_response( From 65ae16901316052f2c3d11691046c72565c4c8cd Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 10:25:36 -0600 Subject: [PATCH 04/15] Add unit tests covering validation The new validator files are >90% covered, and the coverage on the core view is back up to about where it was before this changeset. --- superset/sql_validators/__init__.py | 1 + superset/sql_validators/presto_db.py | 4 +- superset/views/core.py | 3 +- tests/base_tests.py | 15 +++ tests/sql_validator_tests.py | 193 +++++++++++++++++++++++++++ 5 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 tests/sql_validator_tests.py diff --git a/superset/sql_validators/__init__.py b/superset/sql_validators/__init__.py index fc642e1873c51..388d7fef3dc98 100644 --- a/superset/sql_validators/__init__.py +++ b/superset/sql_validators/__init__.py @@ -16,6 +16,7 @@ # under the License. from . import base # noqa from . import presto_db # noqa +from .base import SQLValidationAnnotation # noqa # TODO: Move this to a config setting SQL_VALIDATORS_BY_ENGINE = { diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index 4493dce9b9d57..e24df5567a6b2 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import time import json import logging from contextlib import closing @@ -82,6 +83,7 @@ def validate_statement( state = stats.get('state') if state == 'FINISHED': break + time.sleep(0.2) polled = cursor.poll() db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) return None @@ -125,7 +127,7 @@ def validate( parsed_query = ParsedQuery(sql) statements = parsed_query.get_statements() - logging.debug(f'Validating {len(statements)} statement(s)') + logging.info(f'Validating {len(statements)} statement(s)') engine = database.get_sqla_engine( schema=schema, nullpool=True, diff --git a/superset/views/core.py b/superset/views/core.py index 6f4acd3122423..00ff25bba018c 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2555,7 +2555,8 @@ def validate_sql_json(self): msg = _( 'Failed to validate your SQL query text. Please check that ' f'you have configured the {validator.name} validator ' - 'correctly and that any services it depends on are up.') + 'correctly and that any services it depends on are up. ' + f'Exception: {e}') return json_error_response(f'{msg}') @has_access_api diff --git a/tests/base_tests.py b/tests/base_tests.py index 8555915fd0fe5..6de082afc0066 100644 --- a/tests/base_tests.py +++ b/tests/base_tests.py @@ -190,6 +190,21 @@ def run_sql(self, sql, client_id=None, user_name=None, raise_on_error=False, raise Exception('run_sql failed') return resp + def validate_sql(self, sql, client_id=None, user_name=None, + raise_on_error=False): + if user_name: + self.logout() + self.login(username=(user_name if user_name else 'admin')) + dbid = get_main_database(db.session).id + resp = self.get_json_resp( + '/superset/validate_sql_json/', + raise_on_error=False, + data=dict(database_id=dbid, sql=sql, client_id=client_id), + ) + if raise_on_error and 'error' in resp: + raise Exception('validate_sql failed') + return resp + @patch.dict('superset._feature_flags', {'FOO': True}, clear=True) def test_existing_feature_flags(self): self.assertTrue(is_feature_enabled('FOO')) diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py new file mode 100644 index 0000000000000..c9951ad5b9cf3 --- /dev/null +++ b/tests/sql_validator_tests.py @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Unit tests for Sql Lab""" +import json +import unittest +from unittest.mock import ( + MagicMock, + patch +) + +from pyhive.exc import DatabaseError +from superset.sql_validators import SQLValidationAnnotation +from superset.sql_validators.base import BaseSQLValidator +from superset.sql_validators.presto_db import ( + PrestoDBSQLValidator, + PrestoSQLValidationError, +) + +from .base_tests import SupersetTestCase + + +class SqlValidatorEndpointTests(SupersetTestCase): + """Testing for Sql Lab querytext validation endpoint""" + def tearDown(self): + self.logout() + + @patch('superset.views.core.SQL_VALIDATORS_BY_ENGINE', new_callable=dict) + def test_validate_sql_endpoint_noconfig(self, _validators_by_engine): + """Assert that validate_sql_json errors out when no validators are + configured for any db""" + self.login('admin') + + resp = self.validate_sql( + 'SELECT * FROM ab_user', + client_id='1', + raise_on_error=False, + ) + self.assertIn('error', resp) + self.assertIn('no SQL validator is configured', resp['error']) + + @patch('superset.views.core.SQL_VALIDATORS_BY_ENGINE', new_callable=dict) + def test_validate_sql_endpoint_mocked(self, validators_by_engine): + """Assert that, with a mocked validator, annotations make it back out + from the validate_sql_json endpoint as a list of json dictionaries""" + self.login('admin') + + validator = MagicMock() + validators_by_engine['sqlite'] = validator + validator.validate.return_value = [ + SQLValidationAnnotation( + message="I don't know what I expected, but it wasn't this", + line_number=4, + start_column=12, + end_column=42, + ), + ] + + resp = self.validate_sql( + 'SELECT * FROM somewhere_over_the_rainbow', + client_id='1', + raise_on_error=False, + ) + + self.assertEqual(1, len(resp)) + self.assertIn('expected,', resp[0]['message']) + + @patch('superset.views.core.SQL_VALIDATORS_BY_ENGINE', new_callable=dict) + def test_validate_sql_endpoint_failure(self, validators_by_engine): + """Assert that validate_sql_json errors out when the selected validator + raises an unexpected exception""" + self.login('admin') + + validator = MagicMock() + validators_by_engine['sqlite'] = validator + validator.validate.side_effect = Exception( + "Kaboom!" + ) + + resp = self.validate_sql( + 'SELECT * FROM ab_user', + client_id='1', + raise_on_error=False, + ) + self.assertIn('error', resp) + self.assertIn('Kaboom!', resp['error']) + +class BaseValidatorTests(SupersetTestCase): + """Testing for the base sql validator""" + def setUp(self): + self.validator = BaseSQLValidator + + def test_validator_excepts(self): + with self.assertRaises(NotImplementedError): + _errors = self.validator.validate(None, None, None) + +class PrestoValidatorTests(SupersetTestCase): + """Testing for the prestodb sql validator""" + def setUp(self): + self.validator = PrestoDBSQLValidator + self.database = MagicMock() # noqa + self.database_engine = self.database.get_sqla_engine.return_value + self.database_conn = self.database_engine.raw_connection.return_value + self.database_cursor = self.database_conn.cursor.return_value + self.database_cursor.poll.return_value = None + + def tearDown(self): + self.logout() + + PRESTO_ERROR_TEMPLATE = { + "errorLocation": { + "lineNumber": 10, + "columnNumber": 20, + }, + "message": "your query isn't how I like it", + } + + @patch('superset.sql_validators.presto_db.g') + def test_validator_success(self, flask_g): + flask_g.user.username = 'nobody' + sql = 'SELECT 1 FROM default.notarealtable' + schema = 'default' + + errors = self.validator.validate(sql, schema, self.database) + + self.assertEqual([], errors) + + @patch('superset.sql_validators.presto_db.g') + def test_validator_db_error(self, flask_g): + flask_g.user.username = 'nobody' + sql = 'SELECT 1 FROM default.notarealtable' + schema = 'default' + + fetch_fn = self.database.db_engine_spec.fetch_data + fetch_fn.side_effect = DatabaseError('dummy db error') + + with self.assertRaises(PrestoSQLValidationError): + _errors = self.validator.validate(sql, schema, self.database) + + @patch('superset.sql_validators.presto_db.g') + def test_validator_unexpected_error(self, flask_g): + flask_g.user.username = 'nobody' + sql = 'SELECT 1 FROM default.notarealtable' + schema = 'default' + + fetch_fn = self.database.db_engine_spec.fetch_data + fetch_fn.side_effect = Exception('a mysterious failure') + + with self.assertRaises(Exception): + _errors = self.validator.validate(sql, schema, self.database) + + + @patch('superset.sql_validators.presto_db.g') + def test_validator_query_error(self, flask_g): + flask_g.user.username = 'nobody' + sql = 'SELECT 1 FROM default.notarealtable' + schema = 'default' + + fetch_fn = self.database.db_engine_spec.fetch_data + fetch_fn.side_effect = DatabaseError(self.PRESTO_ERROR_TEMPLATE) + + errors = self.validator.validate(sql, schema, self.database) + + self.assertEqual(1, len(errors)) + + def test_validate_sql_endpoint(self): + self.login('admin') + # NB this is effectively an integration test -- when there's a default + # validator for sqlite, this test will fail because the validator + # will no longer error out. + resp = self.validate_sql( + 'SELECT * FROM ab_user', + client_id='1', + raise_on_error=False, + ) + self.assertIn('error', resp) + self.assertIn('no SQL validator is configured', resp['error']) + +if __name__ == '__main__': + unittest.main() From 9d5ca22bebbfd1b88967a2f954376470dbc19194 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 10:42:43 -0600 Subject: [PATCH 05/15] Fix flake8 lints `tox -e flake8` is clean now --- superset/sql_validators/__init__.py | 2 +- superset/sql_validators/base.py | 19 +++++++++++-------- superset/sql_validators/presto_db.py | 26 ++++++++++++++------------ superset/views/core.py | 4 ++-- tests/sql_validator_tests.py | 27 +++++++++++++-------------- 5 files changed, 41 insertions(+), 37 deletions(-) diff --git a/superset/sql_validators/__init__.py b/superset/sql_validators/__init__.py index 388d7fef3dc98..3affee14969e2 100644 --- a/superset/sql_validators/__init__.py +++ b/superset/sql_validators/__init__.py @@ -20,5 +20,5 @@ # TODO: Move this to a config setting SQL_VALIDATORS_BY_ENGINE = { - 'presto': presto_db.PrestoDBSQLValidator + 'presto': presto_db.PrestoDBSQLValidator, } diff --git a/superset/sql_validators/base.py b/superset/sql_validators/base.py index a21669d2ed4f9..66cc7b05a1bad 100644 --- a/superset/sql_validators/base.py +++ b/superset/sql_validators/base.py @@ -17,10 +17,12 @@ from typing import ( Any, + Dict, List, - Optional + Optional, ) + class SQLValidationAnnotation: """Represents a single annotation (error/warning) in an SQL querytext""" def __init__( @@ -28,19 +30,20 @@ def __init__( message: str, line_number: Optional[int], start_column: Optional[int], - end_column: Optional[int] + end_column: Optional[int], ): self.message = message self.line_number = line_number self.start_column = start_column self.end_column = end_column - def to_dict(self): + def to_dict(self) -> Dict: + """Return a dictionary representation of this annotation""" return { - "line_number": self.line_number, - "start_column": self.start_column, - "end_column": self.end_column, - "message": self.message, + 'line_number': self.line_number, + 'start_column': self.start_column, + 'end_column': self.end_column, + 'message': self.message, } @@ -55,7 +58,7 @@ def validate( cls, sql: str, schema: str, - database: Any + database: Any, ) -> List[SQLValidationAnnotation]: """Check that the given SQL querystring is valid for the given engine""" raise NotImplementedError diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index e24df5567a6b2..2eb28e5cbcb86 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -15,34 +15,36 @@ # specific language governing permissions and limitations # under the License. -import time -import json -import logging from contextlib import closing -from flask import g -from pyhive.exc import DatabaseError +import logging +import time from typing import ( Any, List, - Optional + Optional, ) +from flask import g +from pyhive.exc import DatabaseError + from superset import app, security_manager -from superset.utils.core import sources from superset.sql_parse import ParsedQuery from superset.sql_validators.base import ( BaseSQLValidator, SQLValidationAnnotation, ) +from superset.utils.core import sources MAX_ERROR_ROWS = 10 config = app.config + class PrestoSQLValidationError(Exception): """Error in the process of asking Presto to validate SQL querytext""" pass + class PrestoDBSQLValidator(BaseSQLValidator): """Validate SQL queries using Presto's built-in EXPLAIN subtype""" @@ -54,7 +56,7 @@ def validate_statement( statement, database, cursor, - user_name + user_name, ) -> Optional[SQLValidationAnnotation]: db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(statement) @@ -90,10 +92,10 @@ def validate_statement( except DatabaseError as db_error: if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( - "Presto (via pyhive) returned unparseable error text") + 'Presto (via pyhive) returned unparseable error text') db_error = db_error.args[0] - message = db_error.get('message', "unknown prestodb error") + message = db_error.get('message', 'unknown prestodb error') err_loc = db_error.get('errorLocation', {}) line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) @@ -114,7 +116,7 @@ def validate( cls, sql: str, schema: str, - database: Any + database: Any, ) -> List[SQLValidationAnnotation]: """ Presto supports query-validation queries by running them with a @@ -144,7 +146,7 @@ def validate( statement, database, cursor, - user_name + user_name, ) if annotation: annotations.append(annotation) diff --git a/superset/views/core.py b/superset/views/core.py index 00ff25bba018c..a9a15853d0dda 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2531,7 +2531,7 @@ def validate_sql_json(self): 'Database with id {} is missing.'.format(database_id)) spec = mydb.db_engine_spec - if not spec.engine in SQL_VALIDATORS_BY_ENGINE: + if spec.engine not in SQL_VALIDATORS_BY_ENGINE: return json_error_response( 'no SQL validator is configured for {}'.format(spec.engine)) validator = SQL_VALIDATORS_BY_ENGINE[spec.engine] @@ -2541,7 +2541,7 @@ def validate_sql_json(self): timeout_msg = ( f'The query exceeded the {timeout} seconds timeout.') with utils.timeout(seconds=timeout, - error_message=timeout_msg): + error_message=timeout_msg): errors = validator.validate(sql, schema, mydb) payload = json.dumps( [err.to_dict() for err in errors], diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py index c9951ad5b9cf3..28859131744ec 100644 --- a/tests/sql_validator_tests.py +++ b/tests/sql_validator_tests.py @@ -15,21 +15,20 @@ # specific language governing permissions and limitations # under the License. """Unit tests for Sql Lab""" -import json import unittest from unittest.mock import ( MagicMock, - patch + patch, ) from pyhive.exc import DatabaseError + from superset.sql_validators import SQLValidationAnnotation from superset.sql_validators.base import BaseSQLValidator from superset.sql_validators.presto_db import ( PrestoDBSQLValidator, PrestoSQLValidationError, ) - from .base_tests import SupersetTestCase @@ -86,9 +85,7 @@ def test_validate_sql_endpoint_failure(self, validators_by_engine): validator = MagicMock() validators_by_engine['sqlite'] = validator - validator.validate.side_effect = Exception( - "Kaboom!" - ) + validator.validate.side_effect = Exception('Kaboom!') resp = self.validate_sql( 'SELECT * FROM ab_user', @@ -98,6 +95,7 @@ def test_validate_sql_endpoint_failure(self, validators_by_engine): self.assertIn('error', resp) self.assertIn('Kaboom!', resp['error']) + class BaseValidatorTests(SupersetTestCase): """Testing for the base sql validator""" def setUp(self): @@ -105,7 +103,8 @@ def setUp(self): def test_validator_excepts(self): with self.assertRaises(NotImplementedError): - _errors = self.validator.validate(None, None, None) + self.validator.validate(None, None, None) + class PrestoValidatorTests(SupersetTestCase): """Testing for the prestodb sql validator""" @@ -121,11 +120,11 @@ def tearDown(self): self.logout() PRESTO_ERROR_TEMPLATE = { - "errorLocation": { - "lineNumber": 10, - "columnNumber": 20, + 'errorLocation': { + 'lineNumber': 10, + 'columnNumber': 20, }, - "message": "your query isn't how I like it", + 'message': "your query isn't how I like it", } @patch('superset.sql_validators.presto_db.g') @@ -148,7 +147,7 @@ def test_validator_db_error(self, flask_g): fetch_fn.side_effect = DatabaseError('dummy db error') with self.assertRaises(PrestoSQLValidationError): - _errors = self.validator.validate(sql, schema, self.database) + self.validator.validate(sql, schema, self.database) @patch('superset.sql_validators.presto_db.g') def test_validator_unexpected_error(self, flask_g): @@ -160,8 +159,7 @@ def test_validator_unexpected_error(self, flask_g): fetch_fn.side_effect = Exception('a mysterious failure') with self.assertRaises(Exception): - _errors = self.validator.validate(sql, schema, self.database) - + self.validator.validate(sql, schema, self.database) @patch('superset.sql_validators.presto_db.g') def test_validator_query_error(self, flask_g): @@ -189,5 +187,6 @@ def test_validate_sql_endpoint(self): self.assertIn('error', resp) self.assertIn('no SQL validator is configured', resp['error']) + if __name__ == '__main__': unittest.main() From 88a2351dbd07ae19a4d232ed5acece1a3661ad55 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 11:13:16 -0600 Subject: [PATCH 06/15] Hacky test fix: bind mock validator for pg/mysql --- tests/sql_validator_tests.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py index 28859131744ec..0f12757d881d9 100644 --- a/tests/sql_validator_tests.py +++ b/tests/sql_validator_tests.py @@ -59,6 +59,8 @@ def test_validate_sql_endpoint_mocked(self, validators_by_engine): validator = MagicMock() validators_by_engine['sqlite'] = validator + validators_by_engine['postgresql'] = validator + validators_by_engine['mysql'] = validator validator.validate.return_value = [ SQLValidationAnnotation( message="I don't know what I expected, but it wasn't this", @@ -85,6 +87,8 @@ def test_validate_sql_endpoint_failure(self, validators_by_engine): validator = MagicMock() validators_by_engine['sqlite'] = validator + validators_by_engine['postgresql'] = validator + validators_by_engine['mysql'] = validator validator.validate.side_effect = Exception('Kaboom!') resp = self.validate_sql( From c16902ae87fa01c57ab0856568f3b94f4198d335 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 11:54:49 -0600 Subject: [PATCH 07/15] Fix pylint lints Also fix the flake8s that showed up from fixing pylint. --- superset/sql_validators/base.py | 20 +++++++++++--------- superset/sql_validators/presto_db.py | 28 +++++++++++++++------------- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/superset/sql_validators/base.py b/superset/sql_validators/base.py index 66cc7b05a1bad..437001bed0888 100644 --- a/superset/sql_validators/base.py +++ b/superset/sql_validators/base.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +# pylint: disable=too-few-public-methods + from typing import ( Any, Dict, @@ -26,11 +28,11 @@ class SQLValidationAnnotation: """Represents a single annotation (error/warning) in an SQL querytext""" def __init__( - self, - message: str, - line_number: Optional[int], - start_column: Optional[int], - end_column: Optional[int], + self, + message: str, + line_number: Optional[int], + start_column: Optional[int], + end_column: Optional[int], ): self.message = message self.line_number = line_number @@ -55,10 +57,10 @@ class BaseSQLValidator: @classmethod def validate( - cls, - sql: str, - schema: str, - database: Any, + cls, + sql: str, + schema: str, + database: Any, ) -> List[SQLValidationAnnotation]: """Check that the given SQL querystring is valid for the given engine""" raise NotImplementedError diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index 2eb28e5cbcb86..03c08395a3c10 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -20,6 +20,7 @@ import time from typing import ( Any, + Dict, List, Optional, ) @@ -42,7 +43,6 @@ class PrestoSQLValidationError(Exception): """Error in the process of asking Presto to validate SQL querytext""" - pass class PrestoDBSQLValidator(BaseSQLValidator): @@ -52,17 +52,19 @@ class PrestoDBSQLValidator(BaseSQLValidator): @classmethod def validate_statement( - cls, - statement, - database, - cursor, - user_name, + cls, + statement, + database, + cursor, + user_name, ) -> Optional[SQLValidationAnnotation]: + # pylint: disable=too-many-locals db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() # Hook to allow environment-specific mutation (usually comments) to the SQL + # pylint: disable=invalid-name SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) @@ -93,10 +95,10 @@ def validate_statement( if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( 'Presto (via pyhive) returned unparseable error text') - db_error = db_error.args[0] + error_args: Dict = db_error.args[0] - message = db_error.get('message', 'unknown prestodb error') - err_loc = db_error.get('errorLocation', {}) + message: str = error_args.get('message', 'unknown prestodb error') + err_loc: Dict = error_args.get('errorLocation', {}) line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) end_column = err_loc.get('columnNumber', None) @@ -113,10 +115,10 @@ def validate_statement( @classmethod def validate( - cls, - sql: str, - schema: str, - database: Any, + cls, + sql: str, + schema: str, + database: Any, ) -> List[SQLValidationAnnotation]: """ Presto supports query-validation queries by running them with a From a6305f5b429b554a6543ce2714e6faae89aa60f6 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 11:57:07 -0600 Subject: [PATCH 08/15] Use 400 Bad Request for validation user-error Consider it user-error and respond with 400 instead of 500 if somebody requests validation with template parameters, for a backend with no configured validator, or for an invalid database ID. These are in contrast to the actual server error reply for exceptions thrown by the underlying validator implementation, which still (IMO correctly) returns 500. --- superset/views/core.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/superset/views/core.py b/superset/views/core.py index a9a15853d0dda..01db4de7a7c42 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2522,18 +2522,22 @@ def validate_sql_json(self): # or provide it as mydb so we can render template params # without having to also persist a Query ORM object. return json_error_response( - 'SQL validation does not support template parameters') + 'SQL validation does not support template parameters', + status=400) session = db.session() mydb = session.query(models.Database).filter_by(id=database_id).first() if not mydb: json_error_response( - 'Database with id {} is missing.'.format(database_id)) + 'Database with id {} is missing.'.format(database_id), + status=400, + ) spec = mydb.db_engine_spec if spec.engine not in SQL_VALIDATORS_BY_ENGINE: return json_error_response( - 'no SQL validator is configured for {}'.format(spec.engine)) + 'no SQL validator is configured for {}'.format(spec.engine), + status=400) validator = SQL_VALIDATORS_BY_ENGINE[spec.engine] try: From 6eb4e5deafcc8b522166e8ef471e637434b0d821 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 13:25:50 -0600 Subject: [PATCH 09/15] Move validator config to a feature flag --- superset/config.py | 10 +++++++- superset/sql_validators/__init__.py | 10 ++++---- superset/views/core.py | 15 ++++++++---- tests/sql_validator_tests.py | 37 +++++++++++++++++++---------- 4 files changed, 51 insertions(+), 21 deletions(-) diff --git a/superset/config.py b/superset/config.py index 642e687d07816..d311da7e42401 100644 --- a/superset/config.py +++ b/superset/config.py @@ -200,7 +200,11 @@ # For example, DEFAULT_FEATURE_FLAGS = { 'FOO': True, 'BAR': False } here # and FEATURE_FLAGS = { 'BAR': True, 'BAZ': True } in superset_config.py # will result in combined feature flags of { 'FOO': True, 'BAR': True, 'BAZ': True } -DEFAULT_FEATURE_FLAGS = {} +DEFAULT_FEATURE_FLAGS = { + 'SQL_VALIDATORS_BY_ENGINE': { + 'presto': 'PrestoDBSQLValidator', + }, +} # A function that receives a dict of all feature flags # (DEFAULT_FEATURE_FLAGS merged with FEATURE_FLAGS) @@ -608,6 +612,10 @@ class CeleryConfig(object): # localtime (in the tz where the superset webserver is running) IS_EPOCH_S_TRULY_UTC = False +# Configure which SQL validator to use for each engine +SQL_VALIDATORS_BY_ENGINE = { + 'presto': 'PrestoDBSQLValidator', +} try: if CONFIG_PATH_ENV_VAR in os.environ: diff --git a/superset/sql_validators/__init__.py b/superset/sql_validators/__init__.py index 3affee14969e2..2f0333caf7610 100644 --- a/superset/sql_validators/__init__.py +++ b/superset/sql_validators/__init__.py @@ -14,11 +14,13 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from typing import Optional + from . import base # noqa from . import presto_db # noqa from .base import SQLValidationAnnotation # noqa -# TODO: Move this to a config setting -SQL_VALIDATORS_BY_ENGINE = { - 'presto': presto_db.PrestoDBSQLValidator, -} +def get_validator_by_name(name: str) -> Optional[base.BaseSQLValidator]: + return { + 'PrestoDBSQLValidator': presto_db.PrestoDBSQLValidator, + }.get(name) diff --git a/superset/views/core.py b/superset/views/core.py index 01db4de7a7c42..09ce11767ca4f 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -44,7 +44,7 @@ from werkzeug.utils import secure_filename from superset import ( - app, appbuilder, cache, conf, db, results_backend, + app, appbuilder, cache, conf, db, get_feature_flags, results_backend, security_manager, sql_lab, viz) from superset.connectors.connector_registry import ConnectorRegistry from superset.connectors.sqla.models import AnnotationDatasource, SqlaTable @@ -56,7 +56,7 @@ from superset.models.sql_lab import Query from superset.models.user_attributes import UserAttribute from superset.sql_parse import ParsedQuery -from superset.sql_validators import SQL_VALIDATORS_BY_ENGINE +from superset.sql_validators import get_validator_by_name from superset.utils import core as utils from superset.utils import dashboard_import_export from superset.utils.dates import now_as_float @@ -2534,11 +2534,18 @@ def validate_sql_json(self): ) spec = mydb.db_engine_spec - if spec.engine not in SQL_VALIDATORS_BY_ENGINE: + validators_by_engine = get_feature_flags().get( + 'SQL_VALIDATORS_BY_ENGINE') + if not validators_by_engine or spec.engine not in validators_by_engine: return json_error_response( 'no SQL validator is configured for {}'.format(spec.engine), status=400) - validator = SQL_VALIDATORS_BY_ENGINE[spec.engine] + validator_name = validators_by_engine[spec.engine] + validator = get_validator_by_name(validator_name) + if not validator: + return json_error_response( + 'No validator named {} found (configured for the {} engine)' + .format(validator_name, spec.engine)) try: timeout = config.get('SQLLAB_VALIDATION_TIMEOUT') diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py index 0f12757d881d9..14df93cf457ac 100644 --- a/tests/sql_validator_tests.py +++ b/tests/sql_validator_tests.py @@ -23,6 +23,7 @@ from pyhive.exc import DatabaseError +from superset import app from superset.sql_validators import SQLValidationAnnotation from superset.sql_validators.base import BaseSQLValidator from superset.sql_validators.presto_db import ( @@ -31,18 +32,28 @@ ) from .base_tests import SupersetTestCase +PRESTO_TEST_FEATURE_FLAGS = { + 'SQL_VALIDATORS_BY_ENGINE': { + 'presto': 'PrestoDBSQLValidator', + 'sqlite': 'PrestoDBSQLValidator', + 'postgres': 'PrestoDBSQLValidator', + 'mysql': 'PrestoDBSQLValidator', + }, +} class SqlValidatorEndpointTests(SupersetTestCase): """Testing for Sql Lab querytext validation endpoint""" + def tearDown(self): self.logout() - @patch('superset.views.core.SQL_VALIDATORS_BY_ENGINE', new_callable=dict) - def test_validate_sql_endpoint_noconfig(self, _validators_by_engine): + def test_validate_sql_endpoint_noconfig(self): """Assert that validate_sql_json errors out when no validators are configured for any db""" self.login('admin') + app.config['SQL_VALIDATORS_BY_ENGINE'] = {} + resp = self.validate_sql( 'SELECT * FROM ab_user', client_id='1', @@ -51,16 +62,17 @@ def test_validate_sql_endpoint_noconfig(self, _validators_by_engine): self.assertIn('error', resp) self.assertIn('no SQL validator is configured', resp['error']) - @patch('superset.views.core.SQL_VALIDATORS_BY_ENGINE', new_callable=dict) - def test_validate_sql_endpoint_mocked(self, validators_by_engine): + @patch('superset.views.core.get_validator_by_name') + @patch.dict('superset._feature_flags', + PRESTO_TEST_FEATURE_FLAGS, + clear=True) + def test_validate_sql_endpoint_mocked(self, get_validator_by_name): """Assert that, with a mocked validator, annotations make it back out from the validate_sql_json endpoint as a list of json dictionaries""" self.login('admin') validator = MagicMock() - validators_by_engine['sqlite'] = validator - validators_by_engine['postgresql'] = validator - validators_by_engine['mysql'] = validator + get_validator_by_name.return_value = validator validator.validate.return_value = [ SQLValidationAnnotation( message="I don't know what I expected, but it wasn't this", @@ -79,16 +91,17 @@ def test_validate_sql_endpoint_mocked(self, validators_by_engine): self.assertEqual(1, len(resp)) self.assertIn('expected,', resp[0]['message']) - @patch('superset.views.core.SQL_VALIDATORS_BY_ENGINE', new_callable=dict) - def test_validate_sql_endpoint_failure(self, validators_by_engine): + @patch('superset.views.core.get_validator_by_name') + @patch.dict('superset._feature_flags', + PRESTO_TEST_FEATURE_FLAGS, + clear=True) + def test_validate_sql_endpoint_failure(self, get_validator_by_name): """Assert that validate_sql_json errors out when the selected validator raises an unexpected exception""" self.login('admin') validator = MagicMock() - validators_by_engine['sqlite'] = validator - validators_by_engine['postgresql'] = validator - validators_by_engine['mysql'] = validator + get_validator_by_name.return_value = validator validator.validate.side_effect = Exception('Kaboom!') resp = self.validate_sql( From 2c02f0ac524ffc82bd46b5900891c0acd594b2ef Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 13:26:04 -0600 Subject: [PATCH 10/15] Add docs on configuring validators --- docs/installation.rst | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index a06da876dc729..0e52e30c33f5e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -444,8 +444,8 @@ The connection string for Teradata looks like this :: Required environment variables: :: - export ODBCINI=/.../teradata/client/ODBC_64/odbc.ini - export ODBCINST=/.../teradata/client/ODBC_64/odbcinst.ini + export ODBCINI=/.../teradata/client/ODBC_64/odbc.ini + export ODBCINST=/.../teradata/client/ODBC_64/odbcinst.ini See `Teradata SQLAlchemy `_. @@ -816,6 +816,18 @@ in this dictionary are made available for users to use in their SQL. 'my_crazy_macro': lambda x: x*2, } +SQL Lab also includes a live query validation feature with pluggable backends. +You can configure which validation implementation is used with which database +engine by adding a block like the following to your config.py: + +.. code-block:: python + FEATURE_FLAGS = { + 'SQL_VALIDATORS_BY_ENGINE': { + 'presto': 'PrestoDBSQLValidator', + } + } + +The available validators and names can be found in `sql_validators/`. Celery Flower ------------- @@ -894,7 +906,7 @@ Note that the above command will install Superset into ``default`` namespace of Custom OAuth2 configuration --------------------------- -Beyond FAB supported providers (github, twitter, linkedin, google, azure), its easy to connect Superset with other OAuth2 Authorization Server implementations that support "code" authorization. +Beyond FAB supported providers (github, twitter, linkedin, google, azure), its easy to connect Superset with other OAuth2 Authorization Server implementations that support "code" authorization. The first step: Configure authorization in Superset ``superset_config.py``. @@ -913,10 +925,10 @@ The first step: Configure authorization in Superset ``superset_config.py``. }, 'access_token_method':'POST', # HTTP Method to call access_token_url 'access_token_params':{ # Additional parameters for calls to access_token_url - 'client_id':'myClientId' + 'client_id':'myClientId' }, - 'access_token_headers':{ # Additional headers for calls to access_token_url - 'Authorization': 'Basic Base64EncodedClientIdAndSecret' + 'access_token_headers':{ # Additional headers for calls to access_token_url + 'Authorization': 'Basic Base64EncodedClientIdAndSecret' }, 'base_url':'https://myAuthorizationServer/oauth2AuthorizationServer/', 'access_token_url':'https://myAuthorizationServer/oauth2AuthorizationServer/token', @@ -924,25 +936,25 @@ The first step: Configure authorization in Superset ``superset_config.py``. } } ] - + # Will allow user self registration, allowing to create Flask users from Authorized User AUTH_USER_REGISTRATION = True - + # The default user self registration role AUTH_USER_REGISTRATION_ROLE = "Public" - + Second step: Create a `CustomSsoSecurityManager` that extends `SupersetSecurityManager` and overrides `oauth_user_info`: .. code-block:: python - + from superset.security import SupersetSecurityManager - + class CustomSsoSecurityManager(SupersetSecurityManager): def oauth_user_info(self, provider, response=None): logging.debug("Oauth2 provider: {0}.".format(provider)) if provider == 'egaSSO': - # As example, this line request a GET to base_url + '/' + userDetails with Bearer Authentication, + # As example, this line request a GET to base_url + '/' + userDetails with Bearer Authentication, # and expects that authorization server checks the token, and response with user details me = self.appbuilder.sm.oauth_remotes[provider].get('userDetails').data logging.debug("user_data: {0}".format(me)) @@ -954,7 +966,6 @@ This file must be located at the same directory than ``superset_config.py`` with Then we can add this two lines to ``superset_config.py``: .. code-block:: python - + from custom_sso_security_manager import CustomSsoSecurityManager CUSTOM_SECURITY_MANAGER = CustomSsoSecurityManager - From 13ec10284dced33d9a1c20cb9253c027f89f963a Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 13:35:59 -0600 Subject: [PATCH 11/15] Fix linter complaints --- superset/sql_validators/__init__.py | 1 + superset/sql_validators/presto_db.py | 6 +++--- tests/sql_validator_tests.py | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/superset/sql_validators/__init__.py b/superset/sql_validators/__init__.py index 2f0333caf7610..367aab6186beb 100644 --- a/superset/sql_validators/__init__.py +++ b/superset/sql_validators/__init__.py @@ -20,6 +20,7 @@ from . import presto_db # noqa from .base import SQLValidationAnnotation # noqa + def get_validator_by_name(name: str) -> Optional[base.BaseSQLValidator]: return { 'PrestoDBSQLValidator': presto_db.PrestoDBSQLValidator, diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index 03c08395a3c10..d3ca19865f4ce 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -95,10 +95,10 @@ def validate_statement( if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( 'Presto (via pyhive) returned unparseable error text') - error_args: Dict = db_error.args[0] + error_args: Dict[str, Any] = db_error.args[0] - message: str = error_args.get('message', 'unknown prestodb error') - err_loc: Dict = error_args.get('errorLocation', {}) + message = error_args.get('message', 'unknown prestodb error') + err_loc = error_args.get('errorLocation', {}) line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) end_column = err_loc.get('columnNumber', None) diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py index 14df93cf457ac..8db603255ec1a 100644 --- a/tests/sql_validator_tests.py +++ b/tests/sql_validator_tests.py @@ -41,6 +41,7 @@ }, } + class SqlValidatorEndpointTests(SupersetTestCase): """Testing for Sql Lab querytext validation endpoint""" From 6a1000da528668ac6f01fed2c8f87049ac47037d Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 14:20:32 -0600 Subject: [PATCH 12/15] Extend DatabaseError catch block and document it The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses as though they were normal queries -- in other words it doesn't know that errors for that kind of request aren't exceptional. To work around that, we have to trap the exceptions it returns and translate the ones that express problems with the SQL query. --- superset/sql_validators/presto_db.py | 34 ++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index d3ca19865f4ce..e3a6e0b958066 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -92,13 +92,39 @@ def validate_statement( db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) return None except DatabaseError as db_error: + # The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses + # as though they were normal queries. In other words, it doesn't + # know that errors here are not exceptional. To map this back to + # ordinary control flow, we have to trap the category of exception + # raised by the underlying client, match the exception arguments + # pyhive provides against the shape of dictionary for a presto query + # invalid error, and restructure that error as an annotation we can + # return up. + + # Confirm the first element in the DatabaseError constructor is a + # dictionary with error information. This is currently provided by + # the pyhive client, but may break if their interface changes when + # we update at some point in the future. if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( - 'Presto (via pyhive) returned unparseable error text') + 'The pyhive presto client returned an unhandled ' + 'database error.', + ) from db_error error_args: Dict[str, Any] = db_error.args[0] - message = error_args.get('message', 'unknown prestodb error') - err_loc = error_args.get('errorLocation', {}) + # Confirm the two fields we need to be able to present an annotation + # are present in the error response -- a message, and a location. + if 'message' not in error_args: + raise PrestoSQLValidationError( + 'The pyhive presto client did not report an error message', + ) from db_error + if 'errorLocation' not in error_args: + raise PrestoSQLValidationError( + 'The pyhive presto client did not report an error location', + ) from db_error + + message = error_args['message'] + err_loc = error_args['errorLocation'] line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) end_column = err_loc.get('columnNumber', None) @@ -110,7 +136,7 @@ def validate_statement( end_column=end_column, ) except Exception as e: - logging.exception(f'Error running validation query: {e}') + logging.exception(f'Unexpected error running validation query: {e}') raise e @classmethod From ae9fcd370d9f180433b6f1e4a883b6eda655134c Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 14:27:10 -0600 Subject: [PATCH 13/15] fix postgresql test environment --- tests/sql_validator_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sql_validator_tests.py b/tests/sql_validator_tests.py index 8db603255ec1a..0e1310cb71af2 100644 --- a/tests/sql_validator_tests.py +++ b/tests/sql_validator_tests.py @@ -36,7 +36,7 @@ 'SQL_VALIDATORS_BY_ENGINE': { 'presto': 'PrestoDBSQLValidator', 'sqlite': 'PrestoDBSQLValidator', - 'postgres': 'PrestoDBSQLValidator', + 'postgresql': 'PrestoDBSQLValidator', 'mysql': 'PrestoDBSQLValidator', }, } From f9db0eed918a86c626dfceafede1f35aa9741a02 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 14:35:31 -0600 Subject: [PATCH 14/15] Hit pylint with a hammer It's convinced these are the wrong type. They aren't. --- superset/sql_validators/presto_db.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index e3a6e0b958066..ca061bb74322d 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -123,8 +123,8 @@ def validate_statement( 'The pyhive presto client did not report an error location', ) from db_error - message = error_args['message'] - err_loc = error_args['errorLocation'] + message = error_args['message'] # noqa + err_loc = error_args['errorLocation'] # noqa line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) end_column = err_loc.get('columnNumber', None) From 1700eebf06be8cca36e0d2d8e28f08138733d794 Mon Sep 17 00:00:00 2001 From: Alex Berghage Date: Thu, 2 May 2019 15:12:52 -0600 Subject: [PATCH 15/15] More lint fixes --- superset/sql_validators/presto_db.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/superset/sql_validators/presto_db.py b/superset/sql_validators/presto_db.py index ca061bb74322d..87c2d8efeb805 100644 --- a/superset/sql_validators/presto_db.py +++ b/superset/sql_validators/presto_db.py @@ -123,8 +123,11 @@ def validate_statement( 'The pyhive presto client did not report an error location', ) from db_error - message = error_args['message'] # noqa - err_loc = error_args['errorLocation'] # noqa + # Pylint is confused about the type of error_args, despite the hints + # and checks above. + # pylint: disable=invalid-sequence-index + message = error_args['message'] + err_loc = error_args['errorLocation'] line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) end_column = err_loc.get('columnNumber', None)