-
Notifications
You must be signed in to change notification settings - Fork 14.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(explore): Postgres datatype conversion #13294
Changes from 28 commits
70572c8
8e330db
83f3996
d6c4c1c
35fa495
c00e4c3
488a840
c894b90
2fb975b
82a8c9d
4b8d0ec
ddcc14a
fcb5edc
010e50e
32f58a8
ebcbb53
1bdebbf
2f341b9
39cc3b3
d9afba7
1050974
1a799df
b92e2ac
e630335
c9b5e56
276c820
8430ef3
0189072
bfdc994
8e1d813
f089e9a
7292e37
7e0d4d1
b5f6244
1e06266
4336ae5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,7 +41,7 @@ | |
import sqlparse | ||
from flask import g | ||
from flask_babel import lazy_gettext as _ | ||
from sqlalchemy import column, DateTime, select | ||
from sqlalchemy import column, DateTime, select, types | ||
from sqlalchemy.engine.base import Engine | ||
from sqlalchemy.engine.interfaces import Compiled, Dialect | ||
from sqlalchemy.engine.reflection import Inspector | ||
|
@@ -57,6 +57,7 @@ | |
from superset.models.sql_lab import Query | ||
from superset.sql_parse import ParsedQuery, Table | ||
from superset.utils import core as utils | ||
from superset.utils.core import ColumnSpec, GenericDataType | ||
|
||
if TYPE_CHECKING: | ||
# prevent circular imports | ||
|
@@ -145,8 +146,84 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods | |
_date_trunc_functions: Dict[str, str] = {} | ||
_time_grain_expressions: Dict[Optional[str], str] = {} | ||
column_type_mappings: Tuple[ | ||
Tuple[Pattern[str], Union[TypeEngine, Callable[[Match[str]], TypeEngine]]], ..., | ||
] = () | ||
Tuple[ | ||
Pattern[str], | ||
Union[TypeEngine, Callable[[Match[str]], TypeEngine]], | ||
GenericDataType, | ||
], | ||
..., | ||
] = ( | ||
( | ||
re.compile(r"^smallint", re.IGNORECASE), | ||
types.SmallInteger(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^integer", re.IGNORECASE), | ||
types.Integer(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^bigint", re.IGNORECASE), | ||
types.BigInteger(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^decimal", re.IGNORECASE), | ||
types.Numeric(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^numeric", re.IGNORECASE), | ||
types.Numeric(), | ||
GenericDataType.NUMERIC, | ||
), | ||
(re.compile(r"^real", re.IGNORECASE), types.REAL, GenericDataType.NUMERIC,), | ||
( | ||
re.compile(r"^smallserial", re.IGNORECASE), | ||
types.SmallInteger(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^serial", re.IGNORECASE), | ||
types.Integer(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^bigserial", re.IGNORECASE), | ||
types.BigInteger(), | ||
GenericDataType.NUMERIC, | ||
), | ||
( | ||
re.compile(r"^varchar", re.IGNORECASE), | ||
types.VARCHAR(), | ||
GenericDataType.STRING, | ||
), | ||
(re.compile(r"^char", re.IGNORECASE), types.CHAR(), GenericDataType.STRING), | ||
(re.compile(r"^text", re.IGNORECASE), types.Text(), GenericDataType.STRING), | ||
(re.compile(r"^date", re.IGNORECASE), types.Date(), GenericDataType.TEMPORAL,), | ||
( | ||
re.compile(r"^timestamp", re.IGNORECASE), | ||
types.TIMESTAMP(), | ||
GenericDataType.TEMPORAL, | ||
), | ||
( | ||
re.compile(r"^timestamptz", re.IGNORECASE), | ||
types.TIMESTAMP(timezone=True), | ||
GenericDataType.TEMPORAL, | ||
), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
( | ||
re.compile(r"^interval", re.IGNORECASE), | ||
types.Interval(), | ||
GenericDataType.TEMPORAL, | ||
), | ||
(re.compile(r"^time", re.IGNORECASE), types.Time(), GenericDataType.TEMPORAL,), | ||
( | ||
re.compile(r"^boolean", re.IGNORECASE), | ||
types.Boolean(), | ||
GenericDataType.BOOLEAN, | ||
), | ||
) | ||
time_groupby_inline = False | ||
limit_method = LimitMethod.FORCE_LIMIT | ||
time_secondary_columns = False | ||
|
@@ -162,21 +239,17 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods | |
|
||
# default matching patterns to convert database specific column types to | ||
# more generic types | ||
db_column_types: Dict[utils.GenericDataType, Tuple[Pattern[str], ...]] = { | ||
utils.GenericDataType.NUMERIC: ( | ||
db_column_types: Dict[GenericDataType, Tuple[Pattern[str], ...]] = { | ||
GenericDataType.NUMERIC: ( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need this map anymore? I believe this can be achieved with |
||
re.compile(r"BIT", re.IGNORECASE), | ||
re.compile( | ||
r".*(DOUBLE|FLOAT|INT|NUMBER|REAL|NUMERIC|DECIMAL|MONEY).*", | ||
re.IGNORECASE, | ||
), | ||
re.compile(r".*LONG$", re.IGNORECASE), | ||
), | ||
utils.GenericDataType.STRING: ( | ||
re.compile(r".*(CHAR|STRING|TEXT).*", re.IGNORECASE), | ||
), | ||
utils.GenericDataType.TEMPORAL: ( | ||
re.compile(r".*(DATE|TIME).*", re.IGNORECASE), | ||
), | ||
GenericDataType.STRING: (re.compile(r".*(CHAR|STRING|TEXT).*", re.IGNORECASE),), | ||
GenericDataType.TEMPORAL: (re.compile(r".*(DATE|TIME).*", re.IGNORECASE),), | ||
} | ||
|
||
@classmethod | ||
|
@@ -210,7 +283,7 @@ def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception: | |
|
||
@classmethod | ||
def is_db_column_type_match( | ||
cls, db_column_type: Optional[str], target_column_type: utils.GenericDataType | ||
cls, db_column_type: Optional[str], target_column_type: GenericDataType | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe we can remove this method, as I don't see it being used anymore |
||
) -> bool: | ||
""" | ||
Check if a column type satisfies a pattern in a collection of regexes found in | ||
|
@@ -967,24 +1040,35 @@ def make_label_compatible(cls, label: str) -> Union[str, quoted_name]: | |
return label_mutated | ||
|
||
@classmethod | ||
def get_sqla_column_type(cls, type_: Optional[str]) -> Optional[TypeEngine]: | ||
def get_sqla_column_type( | ||
cls, | ||
column_type: Optional[str], | ||
column_type_mappings: Tuple[ | ||
Tuple[ | ||
Pattern[str], | ||
Union[TypeEngine, Callable[[Match[str]], TypeEngine]], | ||
GenericDataType, | ||
], | ||
..., | ||
] = column_type_mappings, | ||
Comment on lines
+1015
to
+1022
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of passing the mapping to the method, we can probably just call the |
||
) -> Union[Tuple[TypeEngine, GenericDataType], None]: | ||
""" | ||
Return a sqlalchemy native column type that corresponds to the column type | ||
defined in the data source (return None to use default type inferred by | ||
SQLAlchemy). Override `column_type_mappings` for specific needs | ||
(see MSSQL for example of NCHAR/NVARCHAR handling). | ||
|
||
:param type_: Column type returned by inspector | ||
:param column_type: Column type returned by inspector | ||
:return: SqlAlchemy column type | ||
""" | ||
if not type_: | ||
if not column_type: | ||
return None | ||
for regex, sqla_type in cls.column_type_mappings: | ||
match = regex.match(type_) | ||
for regex, sqla_type, generic_type in column_type_mappings: | ||
match = regex.match(column_type) | ||
if match: | ||
if callable(sqla_type): | ||
return sqla_type(match) | ||
return sqla_type | ||
return sqla_type(match), generic_type | ||
villebro marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return sqla_type, generic_type | ||
return None | ||
|
||
@staticmethod | ||
|
@@ -1101,3 +1185,43 @@ def is_readonly_query(cls, parsed_query: ParsedQuery) -> bool: | |
or parsed_query.is_explain() | ||
or parsed_query.is_show() | ||
) | ||
|
||
@classmethod | ||
def get_column_spec( | ||
cls, | ||
native_type: Optional[str], | ||
source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE, | ||
column_type_mappings: Tuple[ | ||
Tuple[ | ||
Pattern[str], | ||
Union[TypeEngine, Callable[[Match[str]], TypeEngine]], | ||
GenericDataType, | ||
], | ||
..., | ||
] = column_type_mappings, | ||
) -> Union[ColumnSpec, None]: | ||
""" | ||
Converts native database type to sqlalchemy column type. | ||
:param native_type: Native database typee | ||
:param source: Type coming from the database table or cursor description | ||
:return: ColumnSpec object | ||
""" | ||
column_type = None | ||
|
||
if ( | ||
cls.get_sqla_column_type( | ||
native_type, column_type_mappings=column_type_mappings | ||
) | ||
is not None | ||
): | ||
column_type, generic_type = cls.get_sqla_column_type( # type: ignore | ||
native_type, column_type_mappings=column_type_mappings | ||
) | ||
is_dttm = generic_type == GenericDataType.TEMPORAL | ||
|
||
if column_type: | ||
return ColumnSpec( | ||
sqla_type=column_type, generic_type=generic_type, is_dttm=is_dttm | ||
) | ||
|
||
return None |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -78,8 +78,16 @@ def fetch_data( | |
return cls.pyodbc_rows_to_tuples(data) | ||
|
||
column_type_mappings = ( | ||
(re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), UnicodeText()), | ||
(re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), String()), | ||
( | ||
re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), | ||
UnicodeText(), | ||
utils.GenericDataType.STRING, | ||
), | ||
( | ||
re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), | ||
String(), | ||
utils.GenericDataType.STRING, | ||
), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would probably design this so that an engine spec can extend the base mapping. In the case of MSSQL, I believe the base mapping is a good fallback. Also, we might consider incorporating these types into the base spec, as I assume fairly many engines support N-prefixed character types, and some of those engines might also benefit from the |
||
) | ||
|
||
@classmethod | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To make sure we match Mysql
INT
type, we could just change this to^INT
to match bothINT
andINTEGER
, unless there are any known incompatible types that could cause a collision.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is an
INTEGER
type inmysql
dialect.