-
Notifications
You must be signed in to change notification settings - Fork 14.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor(trino): Handful of updates for the Trino engine #20152
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,7 +64,6 @@ def get_git_sha() -> str: | |
zip_safe=False, | ||
entry_points={ | ||
"console_scripts": ["superset=superset.cli.main:superset"], | ||
"sqlalchemy.dialects": ["trinonative = trino.sqlalchemy.dialect:TrinoDialect"], | ||
}, | ||
install_requires=[ | ||
"backoff>=1.8.0", | ||
|
@@ -142,7 +141,7 @@ def get_git_sha() -> str: | |
"firebolt": ["firebolt-sqlalchemy>=0.0.1"], | ||
"gsheets": ["shillelagh[gsheetsapi]>=1.0.14, <2"], | ||
"hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"], | ||
"hive": ["pyhive[hive]>=0.6.1", "tableschema", "thrift>=0.11.0, <1.0.0"], | ||
"hive": ["pyhive[hive]>=0.6.5", "tableschema", "thrift>=0.11.0, <1.0.0"], | ||
"impala": ["impyla>0.16.2, <0.17"], | ||
"kusto": ["sqlalchemy-kusto>=1.0.1, <2"], | ||
"kylin": ["kylinpy>=2.8.1, <2.9"], | ||
|
@@ -151,7 +150,7 @@ def get_git_sha() -> str: | |
"oracle": ["cx-Oracle>8.0.0, <8.1"], | ||
"pinot": ["pinotdb>=0.3.3, <0.4"], | ||
"postgres": ["psycopg2-binary==2.9.1"], | ||
"presto": ["pyhive[presto]>=0.4.0"], | ||
"presto": ["pyhive[presto]>=0.6.5"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bumping to ensure that PyHive doesn't specify the |
||
"trino": ["trino>=0.313.0"], | ||
"prophet": ["prophet>=1.0.1, <1.1", "pystan<3.0"], | ||
"redshift": ["sqlalchemy-redshift>=0.8.1, < 0.9"], | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,16 +15,15 @@ | |
# specific language governing permissions and limitations | ||
# under the License. | ||
import logging | ||
from datetime import datetime | ||
from typing import Any, Dict, List, Optional, TYPE_CHECKING | ||
from urllib import parse | ||
from typing import Any, Dict, Optional, TYPE_CHECKING | ||
|
||
import simplejson as json | ||
from flask import current_app | ||
from sqlalchemy.engine.url import URL | ||
|
||
from superset.databases.utils import make_url_safe | ||
from superset.db_engine_specs.base import BaseEngineSpec | ||
from superset.db_engine_specs.presto import PrestoEngineSpec | ||
from superset.utils import core as utils | ||
|
||
if TYPE_CHECKING: | ||
|
@@ -33,66 +32,11 @@ | |
logger = logging.getLogger(__name__) | ||
|
||
|
||
class TrinoEngineSpec(BaseEngineSpec): | ||
class TrinoEngineSpec(PrestoEngineSpec): | ||
engine = "trino" | ||
engine_aliases = {"trinonative"} | ||
engine_aliases = {"trinonative"} # Required for backwards compatibility. | ||
engine_name = "Trino" | ||
|
||
_time_grain_expressions = { | ||
None: "{col}", | ||
"PT1S": "date_trunc('second', CAST({col} AS TIMESTAMP))", | ||
"PT1M": "date_trunc('minute', CAST({col} AS TIMESTAMP))", | ||
"PT1H": "date_trunc('hour', CAST({col} AS TIMESTAMP))", | ||
"P1D": "date_trunc('day', CAST({col} AS TIMESTAMP))", | ||
"P1W": "date_trunc('week', CAST({col} AS TIMESTAMP))", | ||
"P1M": "date_trunc('month', CAST({col} AS TIMESTAMP))", | ||
"P3M": "date_trunc('quarter', CAST({col} AS TIMESTAMP))", | ||
"P1Y": "date_trunc('year', CAST({col} AS TIMESTAMP))", | ||
# "1969-12-28T00:00:00Z/P1W", # Week starting Sunday | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm really not sure why these were commented out. This has always been the case since the first version of this file. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Trino was forked from Presto, I think comments this is safe. thanks for keeping clear up. |
||
# "1969-12-29T00:00:00Z/P1W", # Week starting Monday | ||
# "P1W/1970-01-03T00:00:00Z", # Week ending Saturday | ||
# "P1W/1970-01-04T00:00:00Z", # Week ending Sunday | ||
} | ||
|
||
@classmethod | ||
def convert_dttm( | ||
cls, target_type: str, dttm: datetime, db_extra: Optional[Dict[str, Any]] = None | ||
) -> Optional[str]: | ||
""" | ||
Convert a Python `datetime` object to a SQL expression. | ||
|
||
:param target_type: The target type of expression | ||
:param dttm: The datetime object | ||
:param db_extra: The database extra object | ||
:return: The SQL expression | ||
|
||
Superset only defines time zone naive `datetime` objects, though this method | ||
handles both time zone naive and aware conversions. | ||
""" | ||
tt = target_type.upper() | ||
if tt == utils.TemporalType.DATE: | ||
return f"DATE '{dttm.date().isoformat()}'" | ||
if tt in ( | ||
utils.TemporalType.TIMESTAMP, | ||
utils.TemporalType.TIMESTAMP_WITH_TIME_ZONE, | ||
): | ||
return f"""TIMESTAMP '{dttm.isoformat(timespec="microseconds", sep=" ")}'""" | ||
return None | ||
|
||
@classmethod | ||
def epoch_to_dttm(cls) -> str: | ||
return "from_unixtime({col})" | ||
|
||
@classmethod | ||
def adjust_database_uri( | ||
cls, uri: URL, selected_schema: Optional[str] = None | ||
) -> None: | ||
database = uri.database | ||
if selected_schema and database: | ||
selected_schema = parse.quote(selected_schema, safe="") | ||
database = database.split("/")[0] + "/" + selected_schema | ||
uri.database = database | ||
|
||
@classmethod | ||
def update_impersonation_config( | ||
cls, | ||
|
@@ -133,78 +77,6 @@ def modify_url_for_impersonation( | |
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool: | ||
return True | ||
|
||
@classmethod | ||
def estimate_statement_cost(cls, statement: str, cursor: Any) -> Dict[str, Any]: | ||
""" | ||
Run a SQL query that estimates the cost of a given statement. | ||
|
||
:param statement: A single SQL statement | ||
:param cursor: Cursor instance | ||
:return: JSON response from Trino | ||
""" | ||
sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {statement}" | ||
cursor.execute(sql) | ||
|
||
# the output from Trino is a single column and a single row containing | ||
# JSON: | ||
# | ||
# { | ||
# ... | ||
# "estimate" : { | ||
# "outputRowCount" : 8.73265878E8, | ||
# "outputSizeInBytes" : 3.41425774958E11, | ||
# "cpuCost" : 3.41425774958E11, | ||
# "maxMemory" : 0.0, | ||
# "networkCost" : 3.41425774958E11 | ||
# } | ||
# } | ||
result = json.loads(cursor.fetchone()[0]) | ||
return result | ||
|
||
@classmethod | ||
def query_cost_formatter( | ||
cls, raw_cost: List[Dict[str, Any]] | ||
) -> List[Dict[str, str]]: | ||
""" | ||
Format cost estimate. | ||
|
||
:param raw_cost: JSON estimate from Trino | ||
:return: Human readable cost estimate | ||
""" | ||
|
||
def humanize(value: Any, suffix: str) -> str: | ||
try: | ||
value = int(value) | ||
except ValueError: | ||
return str(value) | ||
|
||
prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"] | ||
prefix = "" | ||
to_next_prefix = 1000 | ||
while value > to_next_prefix and prefixes: | ||
prefix = prefixes.pop(0) | ||
value //= to_next_prefix | ||
|
||
return f"{value} {prefix}{suffix}" | ||
|
||
cost = [] | ||
columns = [ | ||
("outputRowCount", "Output count", " rows"), | ||
("outputSizeInBytes", "Output size", "B"), | ||
("cpuCost", "CPU cost", ""), | ||
("maxMemory", "Max memory", "B"), | ||
("networkCost", "Network cost", ""), | ||
] | ||
for row in raw_cost: | ||
estimate: Dict[str, float] = row.get("estimate", {}) | ||
statement_cost = {} | ||
for key, label, suffix in columns: | ||
if key in estimate: | ||
statement_cost[label] = humanize(estimate[key], suffix).strip() | ||
cost.append(statement_cost) | ||
|
||
return cost | ||
|
||
@staticmethod | ||
def get_extra_params(database: "Database") -> Dict[str, Any]: | ||
""" | ||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Duplicate logic per line #20.