Skip to content

Commit

Permalink
Allow users to estimate query cost before executing it (#8172)
Browse files Browse the repository at this point in the history
* WIP

* Basic functionality working

* Enable per DB

* Show error message

* Small improvements

* Fix lint

* Address comments

* Address comments

* Remove logging, fix exception

* Fix tests

* Bump FAB requirements
  • Loading branch information
betodealmeida authored Sep 17, 2019
1 parent b9be01f commit 8847e10
Show file tree
Hide file tree
Showing 14 changed files with 408 additions and 1 deletion.
19 changes: 19 additions & 0 deletions docs/sqllab.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,22 @@ it's possible for administrators to expose more more macros in their
environment using the configuration variable ``JINJA_CONTEXT_ADDONS``.
All objects referenced in this dictionary will become available for users
to integrate in their queries in **SQL Lab**.

Query cost estimation
'''''''''''''''''''''

Some databases support ``EXPLAIN`` queries that allow users to estimate the cost
of queries before executing this. Currently, Presto is supported in SQL Lab. To
enable query cost estimation, add the following keys to the "Extra" field in the
database configuration:

.. code-block:: json
{
"version": "0.319",
"cost_estimate_enabled": true,
...
}
Here, "version" should be the version of your Presto cluster. Support for this
functionality was introduced in Presto 0.319.
24 changes: 24 additions & 0 deletions superset/assets/src/SqlLab/actions/sqlLab.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ export const CHANGE_DATA_PREVIEW_ID = 'CHANGE_DATA_PREVIEW_ID';
export const START_QUERY_VALIDATION = 'START_QUERY_VALIDATION';
export const QUERY_VALIDATION_RETURNED = 'QUERY_VALIDATION_RETURNED';
export const QUERY_VALIDATION_FAILED = 'QUERY_VALIDATION_FAILED';
export const COST_ESTIMATE_STARTED = 'COST_ESTIMATE_STARTED';
export const COST_ESTIMATE_RETURNED = 'COST_ESTIMATE_RETURNED';
export const COST_ESTIMATE_FAILED = 'COST_ESTIMATE_FAILED';

export const CREATE_DATASOURCE_STARTED = 'CREATE_DATASOURCE_STARTED';
export const CREATE_DATASOURCE_SUCCESS = 'CREATE_DATASOURCE_SUCCESS';
Expand Down Expand Up @@ -120,6 +123,27 @@ export function scheduleQuery(query) {
.catch(() => dispatch(addDangerToast(t('Your query could not be scheduled'))));
}

export function estimateQueryCost(query) {
const { dbId, schema, sql, templateParams } = query;
const endpoint = schema === null
? `/superset/estimate_query_cost/${dbId}/`
: `/superset/estimate_query_cost/${dbId}/${schema}/`;
return dispatch => Promise.all([
dispatch({ type: COST_ESTIMATE_STARTED, query }),
SupersetClient.post({
endpoint,
postPayload: { sql, templateParams: JSON.parse(templateParams) },
})
.then(({ json }) => dispatch({ type: COST_ESTIMATE_RETURNED, query, json }))
.catch(response =>
getClientErrorObject(response).then((error) => {
const message = error.error || error.statusText || t('Failed at retrieving results');
return dispatch({ type: COST_ESTIMATE_FAILED, query, error: message });
}),
),
]);
}

export function startQuery(query) {
Object.assign(query, {
id: query.id ? query.id : shortid.generate(),
Expand Down
108 changes: 108 additions & 0 deletions superset/assets/src/SqlLab/components/EstimateQueryCostButton.jsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import React from 'react';
import PropTypes from 'prop-types';
import { Table } from 'reactable-arc';
import { Alert } from 'react-bootstrap';
import { t } from '@superset-ui/translation';

import Button from '../../components/Button';
import Loading from '../../components/Loading';
import ModalTrigger from '../../components/ModalTrigger';

const propTypes = {
dbId: PropTypes.number.isRequired,
schema: PropTypes.string.isRequired,
sql: PropTypes.string.isRequired,
getEstimate: PropTypes.func.isRequired,
queryCostEstimate: PropTypes.Object,
selectedText: PropTypes.string,
tooltip: PropTypes.string,
disabled: PropTypes.bool,
};
const defaultProps = {
queryCostEstimate: [],
tooltip: '',
disabled: false,
};

class EstimateQueryCostButton extends React.PureComponent {
constructor(props) {
super(props);
this.queryCostModal = React.createRef();
this.onClick = this.onClick.bind(this);
this.renderModalBody = this.renderModalBody.bind(this);
}

onClick() {
this.props.getEstimate();
}

renderModalBody() {
if (this.props.queryCostEstimate.error !== null) {
return (
<Alert key="query-estimate-error" bsStyle="danger">
{this.props.queryCostEstimate.error}
</Alert>
);
} else if (this.props.queryCostEstimate.completed) {
return (
<Table
className="table cost-estimate"
data={this.props.queryCostEstimate.cost}
/>
);
}
return <Loading position="normal" />;
}

render() {
const { disabled, selectedText, tooltip } = this.props;
const btnText = selectedText
? t('Estimate Selected Query Cost')
: t('Estimate Query Cost');
return (
<span className="EstimateQueryCostButton">
<ModalTrigger
ref={this.queryCostModal}
modalTitle={t('Query Cost Estimate')}
modalBody={this.renderModalBody()}
triggerNode={
<Button
bsStyle="warning"
bsSize="small"
onClick={this.onClick}
key="query-estimate-btn"
tooltip={tooltip}
disabled={disabled}
>
<i className="fa fa-clock-o" /> {btnText}
</Button>
}
bsSize="medium"
/>
</span>
);
}
}

EstimateQueryCostButton.propTypes = propTypes;
EstimateQueryCostButton.defaultProps = defaultProps;

export default EstimateQueryCostButton;
32 changes: 32 additions & 0 deletions superset/assets/src/SqlLab/components/SqlEditor.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import TemplateParamsEditor from './TemplateParamsEditor';
import SouthPane from './SouthPane';
import SaveQuery from './SaveQuery';
import ScheduleQueryButton from './ScheduleQueryButton';
import EstimateQueryCostButton from './EstimateQueryCostButton';
import ShareSqlLabQuery from './ShareSqlLabQuery';
import Timer from '../../components/Timer';
import Hotkeys from '../../components/Hotkeys';
Expand Down Expand Up @@ -109,6 +110,7 @@ class SqlEditor extends React.PureComponent {
this.requestValidation.bind(this),
VALIDATION_DEBOUNCE_MS,
);
this.getQueryCostEstimate = this.getQueryCostEstimate.bind(this);
this.handleWindowResize = throttle(
this.handleWindowResize.bind(this),
WINDOW_RESIZE_THROTTLE_MS,
Expand Down Expand Up @@ -210,6 +212,19 @@ class SqlEditor extends React.PureComponent {
setQueryLimit(queryLimit) {
this.props.actions.queryEditorSetQueryLimit(this.props.queryEditor, queryLimit);
}
getQueryCostEstimate() {
if (this.props.database) {
const qe = this.props.queryEditor;
const query = {
dbId: qe.dbId,
sql: qe.selectedText ? qe.selectedText : this.state.sql,
sqlEditorId: qe.id,
schema: qe.schema,
templateParams: qe.templateParams,
};
this.props.actions.estimateQueryCost(query);
}
}
handleWindowResize() {
this.setState({ height: this.getSqlEditorHeight() });
}
Expand Down Expand Up @@ -383,6 +398,23 @@ class SqlEditor extends React.PureComponent {
sql={this.state.sql}
/>
</span>
{
isFeatureEnabled(FeatureFlag.ESTIMATE_QUERY_COST) &&
this.props.database &&
this.props.database.allows_cost_estimate &&
<span className="m-r-5">
<EstimateQueryCostButton
dbId={qe.dbId}
schema={qe.schema}
sql={qe.sql}
getEstimate={this.getQueryCostEstimate}
queryCostEstimate={qe.queryCostEstimate}
selectedText={qe.selectedText}
tooltip={t('Estimate the cost before running a query')}
className="m-r-5"
/>
</span>
}
{isFeatureEnabled(FeatureFlag.SCHEDULED_QUERIES) &&
<span className="m-r-5">
<ScheduleQueryButton
Expand Down
4 changes: 4 additions & 0 deletions superset/assets/src/SqlLab/main.less
Original file line number Diff line number Diff line change
Expand Up @@ -435,3 +435,7 @@ a.Link {
.SouthPane .tab-content button.fetch {
margin-top: 10px;
}

.cost-estimate {
font-size: 12px;
}
5 changes: 5 additions & 0 deletions superset/assets/src/SqlLab/reducers/getInitialState.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ export default function getInitialState({ defaultDbId, ...restBootstrapData }) {
errors: [],
completed: false,
},
queryCostEstimate: {
cost: null,
completed: false,
error: null,
},
};

return {
Expand Down
36 changes: 36 additions & 0 deletions superset/assets/src/SqlLab/reducers/sqlLab.js
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,42 @@ export default function sqlLabReducer(state = {}, action) {
});
return newState;
},
[actions.COST_ESTIMATE_STARTED]() {
let newState = Object.assign({}, state);
const sqlEditor = { id: action.query.sqlEditorId };
newState = alterInArr(newState, 'queryEditors', sqlEditor, {
queryCostEstimate: {
completed: false,
cost: null,
error: null,
},
});
return newState;
},
[actions.COST_ESTIMATE_RETURNED]() {
let newState = Object.assign({}, state);
const sqlEditor = { id: action.query.sqlEditorId };
newState = alterInArr(newState, 'queryEditors', sqlEditor, {
queryCostEstimate: {
completed: true,
cost: action.json,
error: null,
},
});
return newState;
},
[actions.COST_ESTIMATE_FAILED]() {
let newState = Object.assign({}, state);
const sqlEditor = { id: action.query.sqlEditorId };
newState = alterInArr(newState, 'queryEditors', sqlEditor, {
queryCostEstimate: {
completed: false,
cost: null,
error: action.error,
},
});
return newState;
},
[actions.START_QUERY]() {
let newState = Object.assign({}, state);
if (action.query.sqlEditorId) {
Expand Down
1 change: 1 addition & 0 deletions superset/assets/src/featureFlags.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export enum FeatureFlag {
CLIENT_CACHE = 'CLIENT_CACHE',
SCHEDULED_QUERIES = 'SCHEDULED_QUERIES',
SQL_VALIDATORS_BY_ENGINE = 'SQL_VALIDATORS_BY_ENGINE',
ESTIMATE_QUERY_COST = 'ESTIMATE_QUERY_COST',
}

export type FeatureFlagMap = {
Expand Down
5 changes: 5 additions & 0 deletions superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,11 @@ class CeleryConfig(object):
# by celery.
SQLLAB_ASYNC_TIME_LIMIT_SEC = 60 * 60 * 6

# Some databases support running EXPLAIN queries that allow users to estimate
# query costs before they run. These EXPLAIN queries should have a small
# timeout.
SQLLAB_QUERY_COST_ESTIMATE_TIMEOUT = 10 # seconds

# An instantiated derivative of werkzeug.contrib.cache.BaseCache
# if enabled, it can be used to store the results of long-running queries
# in SQL Lab by using the "Run Async" button/feature
Expand Down
54 changes: 54 additions & 0 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
# pylint: disable=C,R,W
from contextlib import closing
from datetime import datetime
import hashlib
import os
Expand Down Expand Up @@ -125,6 +126,10 @@ class BaseEngineSpec:
max_column_name_length = 0
try_remove_schema_from_table_name = True

@classmethod
def get_allow_cost_estimate(cls, version: str = None) -> bool:
return False

@classmethod
def get_timestamp_expr(
cls, col: ColumnClause, pdf: Optional[str], time_grain: Optional[str]
Expand Down Expand Up @@ -649,6 +654,55 @@ def select_star(
sql = sqlparse.format(sql, reindent=True)
return sql

@classmethod
def estimate_statement_cost(
cls, statement: str, database, cursor, user_name: str
) -> Dict[str, str]:
"""
Generate a SQL query that estimates the cost of a given statement.
:param statement: A single SQL statement
:param database: Database instance
:param cursor: Cursor instance
:param username: Effective username
"""
raise Exception("Database does not support cost estimation")

@classmethod
def estimate_query_cost(
cls, database, schema: str, sql: str, source: str = None
) -> List[Dict[str, str]]:
"""
Estimate the cost of a multiple statement SQL query.
:param database: Database instance
:param schema: Database schema
:param sql: SQL query with possibly multiple statements
:param source: Source of the query (eg, "sql_lab")
"""
database_version = database.get_extra().get("version")
if not cls.get_allow_cost_estimate(database_version):
raise Exception("Database does not support cost estimation")

user_name = g.user.username if g.user else None
parsed_query = sql_parse.ParsedQuery(sql)
statements = parsed_query.get_statements()

engine = database.get_sqla_engine(
schema=schema, nullpool=True, user_name=user_name, source=source
)

costs = []
with closing(engine.raw_connection()) as conn:
with closing(conn.cursor()) as cursor:
for statement in statements:
costs.append(
cls.estimate_statement_cost(
statement, database, cursor, user_name
)
)
return costs

@classmethod
def modify_url_for_impersonation(cls, url, impersonate_user: bool, username: str):
"""
Expand Down
Loading

0 comments on commit 8847e10

Please sign in to comment.