From c6e593465b109241a5e844f640187f2c21204841 Mon Sep 17 00:00:00 2001 From: "Hugh A. Miles II" Date: Fri, 17 Mar 2023 18:02:25 -0600 Subject: [PATCH] chore: Hugh/migrate estimate query cost to v1 (#23226) Co-authored-by: Diego Medina --- UPDATING.md | 1 + .../src/SqlLab/actions/sqlLab.js | 20 ++-- .../src/SqlLab/reducers/sqlLab.js | 2 +- superset/security/manager.py | 1 + superset/sqllab/api.py | 56 ++++++++- superset/sqllab/commands/estimate.py | 106 ++++++++++++++++++ superset/sqllab/schemas.py | 9 ++ superset/views/core.py | 1 + tests/integration_tests/sql_lab/api_tests.py | 67 +++++++++++ .../sql_lab/commands_tests.py | 81 ++++++++++++- 10 files changed, 329 insertions(+), 15 deletions(-) create mode 100644 superset/sqllab/commands/estimate.py diff --git a/UPDATING.md b/UPDATING.md index c29b7182a0534..3496742bee131 100644 --- a/UPDATING.md +++ b/UPDATING.md @@ -24,6 +24,7 @@ assists people when migrating to a new version. ## Next +- [23226](https://github.com/apache/superset/pull/23226) Migrated endpoint `/estimate_query_cost/` to `/api/v1/sqllab/estimate/`. Corresponding permissions are can estimate query cost on SQLLab. Make sure you add/replace the necessary permissions on any custom roles you may have. - [22809](https://github.com/apache/superset/pull/22809): Migrated endpoint `/superset/sql_json` and `/superset/results/` to `/api/v1/sqllab/execute/` and `/api/v1/sqllab/results/` respectively. Corresponding permissions are `can sql_json on Superset` to `can execute on SQLLab`, `can results on Superset` to `can results on SQLLab`. Make sure you add/replace the necessary permissions on any custom roles you may have. - [22931](https://github.com/apache/superset/pull/22931): Migrated endpoint `/superset/get_or_create_table/` to `/api/v1/dataset/get_or_create/`. Corresponding permissions are `can get or create table on Superset` to `can get or create dataset on Dataset`. Make sure you add/replace the necessary permissions on any custom roles you may have. - [22882](https://github.com/apache/superset/pull/22882): Migrated endpoint `/superset/filter////` to `/api/v1/datasource///column//values/`. Corresponding permissions are `can filter on Superset` to `can get column values on Datasource`. Make sure you add/replace the necessary permissions on any custom roles you may have. diff --git a/superset-frontend/src/SqlLab/actions/sqlLab.js b/superset-frontend/src/SqlLab/actions/sqlLab.js index ab8abe0edca21..c27485879b8cd 100644 --- a/superset-frontend/src/SqlLab/actions/sqlLab.js +++ b/superset-frontend/src/SqlLab/actions/sqlLab.js @@ -184,18 +184,20 @@ export function estimateQueryCost(queryEditor) { const { dbId, schema, sql, selectedText, templateParams } = getUpToDateQuery(getState(), queryEditor); const requestSql = selectedText || sql; - const endpoint = - schema === null - ? `/superset/estimate_query_cost/${dbId}/` - : `/superset/estimate_query_cost/${dbId}/${schema}/`; + + const postPayload = { + database_id: dbId, + schema, + sql: requestSql, + template_params: JSON.parse(templateParams || '{}'), + }; + return Promise.all([ dispatch({ type: COST_ESTIMATE_STARTED, query: queryEditor }), SupersetClient.post({ - endpoint, - postPayload: { - sql: requestSql, - templateParams: JSON.parse(templateParams || '{}'), - }, + endpoint: '/api/v1/sqllab/estimate/', + body: JSON.stringify(postPayload), + headers: { 'Content-Type': 'application/json' }, }) .then(({ json }) => dispatch({ type: COST_ESTIMATE_RETURNED, query: queryEditor, json }), diff --git a/superset-frontend/src/SqlLab/reducers/sqlLab.js b/superset-frontend/src/SqlLab/reducers/sqlLab.js index e3bb196fbcef0..a110914b814cf 100644 --- a/superset-frontend/src/SqlLab/reducers/sqlLab.js +++ b/superset-frontend/src/SqlLab/reducers/sqlLab.js @@ -335,7 +335,7 @@ export default function sqlLabReducer(state = {}, action) { ...state.queryCostEstimates, [action.query.id]: { completed: true, - cost: action.json, + cost: action.json.result, error: null, }, }, diff --git a/superset/security/manager.py b/superset/security/manager.py index 5aa5080294f73..9e7a8bbd4bb10 100644 --- a/superset/security/manager.py +++ b/superset/security/manager.py @@ -250,6 +250,7 @@ class SupersetSecurityManager( # pylint: disable=too-many-public-methods ("can_export_csv", "Query"), ("can_get_results", "SQLLab"), ("can_execute_sql_query", "SQLLab"), + ("can_estimate_query_cost", "SQL Lab"), ("can_export_csv", "SQLLab"), ("can_sql_json", "Superset"), # Deprecated permission remove on 3.0.0 ("can_sqllab_history", "Superset"), diff --git a/superset/sqllab/api.py b/superset/sqllab/api.py index 5915601c0de15..8801cbc5f96d3 100644 --- a/superset/sqllab/api.py +++ b/superset/sqllab/api.py @@ -19,7 +19,7 @@ from urllib import parse import simplejson as json -from flask import request +from flask import request, Response from flask_appbuilder.api import expose, protect, rison from flask_appbuilder.models.sqla.interface import SQLAInterface from marshmallow import ValidationError @@ -32,6 +32,7 @@ from superset.queries.dao import QueryDAO from superset.sql_lab import get_sql_results from superset.sqllab.command_status import SqlJsonExecutionStatus +from superset.sqllab.commands.estimate import QueryEstimationCommand from superset.sqllab.commands.execute import CommandResult, ExecuteSqlCommand from superset.sqllab.commands.export import SqlResultExportCommand from superset.sqllab.commands.results import SqlExecutionResultsCommand @@ -42,6 +43,7 @@ from superset.sqllab.execution_context_convertor import ExecutionContextConvertor from superset.sqllab.query_render import SqlQueryRenderImpl from superset.sqllab.schemas import ( + EstimateQueryCostSchema, ExecutePayloadSchema, QueryExecutionResponseSchema, sql_lab_get_results_schema, @@ -70,6 +72,7 @@ class SqlLabRestApi(BaseSupersetApi): class_permission_name = "SQLLab" + estimate_model_schema = EstimateQueryCostSchema() execute_model_schema = ExecutePayloadSchema() apispec_parameter_schemas = { @@ -77,10 +80,61 @@ class SqlLabRestApi(BaseSupersetApi): } openapi_spec_tag = "SQL Lab" openapi_spec_component_schemas = ( + EstimateQueryCostSchema, ExecutePayloadSchema, QueryExecutionResponseSchema, ) + @expose("/estimate/", methods=["POST"]) + @protect() + @statsd_metrics + @requires_json + @event_logger.log_this_with_context( + action=lambda self, *args, **kwargs: f"{self.__class__.__name__}" + f".estimate_query_cost", + log_to_statsd=False, + ) + def estimate_query_cost(self) -> Response: + """Estimates the SQL query execution cost + --- + post: + summary: >- + Estimates the SQL query execution cost + requestBody: + description: SQL query and params + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/EstimateQueryCostSchema' + responses: + 200: + description: Query estimation result + content: + application/json: + schema: + type: object + properties: + result: + type: object + 400: + $ref: '#/components/responses/400' + 401: + $ref: '#/components/responses/401' + 403: + $ref: '#/components/responses/403' + 500: + $ref: '#/components/responses/500' + """ + try: + model = self.estimate_model_schema.load(request.json) + except ValidationError as error: + return self.response_400(message=error.messages) + + command = QueryEstimationCommand(model) + result = command.run() + return self.response(200, result=result) + @expose("/export//") @protect() @statsd_metrics diff --git a/superset/sqllab/commands/estimate.py b/superset/sqllab/commands/estimate.py new file mode 100644 index 0000000000000..2b8c5814b953c --- /dev/null +++ b/superset/sqllab/commands/estimate.py @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from __future__ import annotations + +import logging +from typing import Any, Dict, List + +from flask_babel import gettext as __ + +from superset import app, db +from superset.commands.base import BaseCommand +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType +from superset.exceptions import SupersetErrorException, SupersetTimeoutException +from superset.jinja_context import get_template_processor +from superset.models.core import Database +from superset.sqllab.schemas import EstimateQueryCostSchema +from superset.utils import core as utils + +config = app.config +SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = config["SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT"] +stats_logger = config["STATS_LOGGER"] + +logger = logging.getLogger(__name__) + + +class QueryEstimationCommand(BaseCommand): + _database_id: int + _sql: str + _template_params: Dict[str, Any] + _schema: str + _database: Database + + def __init__(self, params: EstimateQueryCostSchema) -> None: + self._database_id = params.get("database_id") + self._sql = params.get("sql", "") + self._template_params = params.get("template_params", {}) + self._schema = params.get("schema", "") + + def validate(self) -> None: + self._database = db.session.query(Database).get(self._database_id) + if not self._database: + raise SupersetErrorException( + SupersetError( + message=__("The database could not be found"), + error_type=SupersetErrorType.RESULTS_BACKEND_ERROR, + level=ErrorLevel.ERROR, + ), + status=404, + ) + + def run( + self, + ) -> List[Dict[str, Any]]: + self.validate() + + sql = self._sql + if self._template_params: + template_processor = get_template_processor(self._database) + sql = template_processor.process_template(sql, **self._template_params) + + timeout = SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT + timeout_msg = f"The estimation exceeded the {timeout} seconds timeout." + try: + with utils.timeout(seconds=timeout, error_message=timeout_msg): + cost = self._database.db_engine_spec.estimate_query_cost( + self._database, self._schema, sql, utils.QuerySource.SQL_LAB + ) + except SupersetTimeoutException as ex: + logger.exception(ex) + raise SupersetErrorException( + SupersetError( + message=__( + "The query estimation was killed after %(sqllab_timeout)s " + "seconds. It might be too complex, or the database might be " + "under heavy load.", + sqllab_timeout=SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT, + ), + error_type=SupersetErrorType.SQLLAB_TIMEOUT_ERROR, + level=ErrorLevel.ERROR, + ), + status=500, + ) from ex + + spec = self._database.db_engine_spec + query_cost_formatters: Dict[str, Any] = app.config[ + "QUERY_COST_FORMATTERS_BY_ENGINE" + ] + query_cost_formatter = query_cost_formatters.get( + spec.engine, spec.query_cost_formatter + ) + cost = query_cost_formatter(cost) + return cost diff --git a/superset/sqllab/schemas.py b/superset/sqllab/schemas.py index f238fda5c918f..134b9ea7bb799 100644 --- a/superset/sqllab/schemas.py +++ b/superset/sqllab/schemas.py @@ -25,6 +25,15 @@ } +class EstimateQueryCostSchema(Schema): + database_id = fields.Integer(required=True, description="The database id") + sql = fields.String(required=True, description="The SQL query to estimate") + template_params = fields.Dict( + keys=fields.String(), description="The SQL query template params" + ) + schema = fields.String(allow_none=True, description="The database schema") + + class ExecutePayloadSchema(Schema): database_id = fields.Integer(required=True) sql = fields.String(required=True) diff --git a/superset/views/core.py b/superset/views/core.py index 3bd0ec651edc6..44f1b78af0a01 100755 --- a/superset/views/core.py +++ b/superset/views/core.py @@ -2062,6 +2062,7 @@ def extra_table_metadata( # pylint: disable=no-self-use @expose("/estimate_query_cost//", methods=["POST"]) @expose("/estimate_query_cost///", methods=["POST"]) @event_logger.log_this + @deprecated() def estimate_query_cost( # pylint: disable=no-self-use self, database_id: int, schema: Optional[str] = None ) -> FlaskResponse: diff --git a/tests/integration_tests/sql_lab/api_tests.py b/tests/integration_tests/sql_lab/api_tests.py index 93beb380f0db6..a57d24c3e45ed 100644 --- a/tests/integration_tests/sql_lab/api_tests.py +++ b/tests/integration_tests/sql_lab/api_tests.py @@ -42,6 +42,73 @@ class TestSqlLabApi(SupersetTestCase): + def test_estimate_required_params(self): + self.login() + + rv = self.client.post( + "/api/v1/sqllab/estimate/", + json={}, + ) + failed_resp = { + "message": { + "sql": ["Missing data for required field."], + "database_id": ["Missing data for required field."], + } + } + resp_data = json.loads(rv.data.decode("utf-8")) + self.assertDictEqual(resp_data, failed_resp) + self.assertEqual(rv.status_code, 400) + + data = {"sql": "SELECT 1"} + rv = self.client.post( + "/api/v1/sqllab/estimate/", + json=data, + ) + failed_resp = {"message": {"database_id": ["Missing data for required field."]}} + resp_data = json.loads(rv.data.decode("utf-8")) + self.assertDictEqual(resp_data, failed_resp) + self.assertEqual(rv.status_code, 400) + + data = {"database_id": 1} + rv = self.client.post( + "/api/v1/sqllab/estimate/", + json=data, + ) + failed_resp = {"message": {"sql": ["Missing data for required field."]}} + resp_data = json.loads(rv.data.decode("utf-8")) + self.assertDictEqual(resp_data, failed_resp) + self.assertEqual(rv.status_code, 400) + + def test_estimate_valid_request(self): + self.login() + + formatter_response = [ + { + "value": 100, + } + ] + + db_mock = mock.Mock() + db_mock.db_engine_spec = mock.Mock() + db_mock.db_engine_spec.estimate_query_cost = mock.Mock(return_value=100) + db_mock.db_engine_spec.query_cost_formatter = mock.Mock( + return_value=formatter_response + ) + + with mock.patch("superset.sqllab.commands.estimate.db") as mock_superset_db: + mock_superset_db.session.query().get.return_value = db_mock + + data = {"database_id": 1, "sql": "SELECT 1"} + rv = self.client.post( + "/api/v1/sqllab/estimate/", + json=data, + ) + + success_resp = {"result": formatter_response} + resp_data = json.loads(rv.data.decode("utf-8")) + self.assertDictEqual(resp_data, success_resp) + self.assertEqual(rv.status_code, 200) + @mock.patch("superset.sqllab.commands.results.results_backend_use_msgpack", False) def test_execute_required_params(self): self.login() diff --git a/tests/integration_tests/sql_lab/commands_tests.py b/tests/integration_tests/sql_lab/commands_tests.py index cf0aebf001e01..3d505ee2f544a 100644 --- a/tests/integration_tests/sql_lab/commands_tests.py +++ b/tests/integration_tests/sql_lab/commands_tests.py @@ -19,25 +19,98 @@ import pandas as pd import pytest +from flask_babel import gettext as __ -from superset import db, sql_lab +from superset import app, db, sql_lab from superset.common.db_query_status import QueryStatus -from superset.errors import ErrorLevel, SupersetErrorType +from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.exceptions import ( SerializationError, - SupersetError, SupersetErrorException, SupersetSecurityException, + SupersetTimeoutException, ) from superset.models.core import Database from superset.models.sql_lab import Query -from superset.sqllab.commands import export, results +from superset.sqllab.commands import estimate, export, results from superset.sqllab.limiting_factor import LimitingFactor +from superset.sqllab.schemas import EstimateQueryCostSchema from superset.utils import core as utils from superset.utils.database import get_example_database from tests.integration_tests.base_tests import SupersetTestCase +class TestQueryEstimationCommand(SupersetTestCase): + def test_validation_no_database(self) -> None: + params = {"database_id": 1, "sql": "SELECT 1"} + schema = EstimateQueryCostSchema() + data: EstimateQueryCostSchema = schema.dump(params) + command = estimate.QueryEstimationCommand(data) + + with mock.patch("superset.sqllab.commands.estimate.db") as mock_superset_db: + mock_superset_db.session.query().get.return_value = None + with pytest.raises(SupersetErrorException) as ex_info: + command.validate() + assert ( + ex_info.value.error.error_type + == SupersetErrorType.RESULTS_BACKEND_ERROR + ) + + @patch("superset.tasks.scheduler.is_feature_enabled") + def test_run_timeout(self, is_feature_enabled) -> None: + params = {"database_id": 1, "sql": "SELECT 1", "template_params": {"temp": 123}} + schema = EstimateQueryCostSchema() + data: EstimateQueryCostSchema = schema.dump(params) + command = estimate.QueryEstimationCommand(data) + + db_mock = mock.Mock() + db_mock.db_engine_spec = mock.Mock() + db_mock.db_engine_spec.estimate_query_cost = mock.Mock( + side_effect=SupersetTimeoutException( + error_type=SupersetErrorType.CONNECTION_DATABASE_TIMEOUT, + message=( + "Please check your connection details and database settings, " + "and ensure that your database is accepting connections, " + "then try connecting again." + ), + level=ErrorLevel.ERROR, + ) + ) + db_mock.db_engine_spec.query_cost_formatter = mock.Mock(return_value=None) + is_feature_enabled.return_value = False + + with mock.patch("superset.sqllab.commands.estimate.db") as mock_superset_db: + mock_superset_db.session.query().get.return_value = db_mock + with pytest.raises(SupersetErrorException) as ex_info: + command.run() + assert ( + ex_info.value.error.error_type == SupersetErrorType.SQLLAB_TIMEOUT_ERROR + ) + assert ex_info.value.error.message == __( + "The query estimation was killed after %(sqllab_timeout)s seconds. It might " + "be too complex, or the database might be under heavy load.", + sqllab_timeout=app.config["SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT"], + ) + + def test_run_success(self) -> None: + params = {"database_id": 1, "sql": "SELECT 1"} + schema = EstimateQueryCostSchema() + data: EstimateQueryCostSchema = schema.dump(params) + command = estimate.QueryEstimationCommand(data) + + payload = {"value": 100} + + db_mock = mock.Mock() + db_mock.db_engine_spec = mock.Mock() + db_mock.db_engine_spec.estimate_query_cost = mock.Mock(return_value=100) + db_mock.db_engine_spec.query_cost_formatter = mock.Mock(return_value=payload) + + with mock.patch("superset.sqllab.commands.estimate.db") as mock_superset_db: + mock_superset_db.session.query().get.return_value = db_mock + result = command.run() + assert result == payload + + class TestSqlResultExportCommand(SupersetTestCase): @pytest.fixture() def create_database_and_query(self):