Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(explore): Postgres datatype conversion #13294

Merged
merged 36 commits into from
Mar 12, 2021
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
70572c8
test
nikolagigic Feb 23, 2021
8e330db
unnecessary import
nikolagigic Feb 23, 2021
83f3996
fix lint
nikolagigic Feb 23, 2021
d6c4c1c
changes
nikolagigic Feb 24, 2021
35fa495
fix lint
nikolagigic Feb 25, 2021
c00e4c3
changes
nikolagigic Feb 26, 2021
488a840
changes
nikolagigic Feb 26, 2021
c894b90
changes
nikolagigic Feb 26, 2021
2fb975b
Merge branch 'master' into postgres_type_conversion
nikolagigic Feb 26, 2021
82a8c9d
changes
nikolagigic Mar 2, 2021
4b8d0ec
answering comments & changes
nikolagigic Mar 3, 2021
ddcc14a
answering comments
nikolagigic Mar 3, 2021
fcb5edc
answering comments
nikolagigic Mar 3, 2021
010e50e
changes
nikolagigic Mar 3, 2021
32f58a8
changes
nikolagigic Mar 4, 2021
ebcbb53
changes
nikolagigic Mar 4, 2021
1bdebbf
fix tests
nikolagigic Mar 4, 2021
2f341b9
fix tests
nikolagigic Mar 4, 2021
39cc3b3
fix tests
nikolagigic Mar 5, 2021
d9afba7
fix tests
nikolagigic Mar 5, 2021
1050974
fix tests
nikolagigic Mar 5, 2021
1a799df
Merge branch 'master' into postgres_type_conversion
nikolagigic Mar 5, 2021
b92e2ac
fix tests
nikolagigic Mar 8, 2021
e630335
fix tests
nikolagigic Mar 8, 2021
c9b5e56
fix tests
nikolagigic Mar 8, 2021
276c820
fix tests
nikolagigic Mar 8, 2021
8430ef3
fix tests
nikolagigic Mar 9, 2021
0189072
fix tests
nikolagigic Mar 9, 2021
bfdc994
fix tests
nikolagigic Mar 10, 2021
8e1d813
Merge branch 'master' into postgres_type_conversion
nikolagigic Mar 10, 2021
f089e9a
fix tests
nikolagigic Mar 10, 2021
7292e37
fix tests
nikolagigic Mar 10, 2021
7e0d4d1
fix tests
nikolagigic Mar 10, 2021
b5f6244
fix tests
nikolagigic Mar 11, 2021
1e06266
fix tests
nikolagigic Mar 11, 2021
4336ae5
fix tests
nikolagigic Mar 12, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 15 additions & 13 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
from superset.sql_parse import ParsedQuery
from superset.typing import Metric, QueryObjectDict
from superset.utils import core as utils
from superset.utils.core import GenericDataType

config = app.config
metadata = Model.metadata # pylint: disable=no-member
Expand Down Expand Up @@ -187,20 +188,20 @@ def is_numeric(self) -> bool:
"""
Check if the column has a numeric datatype.
"""
db_engine_spec = self.table.database.db_engine_spec
return db_engine_spec.is_db_column_type_match(
self.type, utils.GenericDataType.NUMERIC
)
column_spec = self.table.database.db_engine_spec.get_column_spec(self.type)
if column_spec is None:
return False
return column_spec.generic_type == GenericDataType.NUMERIC

@property
def is_string(self) -> bool:
"""
Check if the column has a string datatype.
"""
db_engine_spec = self.table.database.db_engine_spec
return db_engine_spec.is_db_column_type_match(
self.type, utils.GenericDataType.STRING
)
column_spec = self.table.database.db_engine_spec.get_column_spec(self.type)
if column_spec is None:
return False
return column_spec.generic_type == GenericDataType.STRING

@property
def is_temporal(self) -> bool:
Expand All @@ -212,18 +213,19 @@ def is_temporal(self) -> bool:
"""
if self.is_dttm is not None:
return self.is_dttm
db_engine_spec = self.table.database.db_engine_spec
return db_engine_spec.is_db_column_type_match(
self.type, utils.GenericDataType.TEMPORAL
)
column_spec = self.table.database.db_engine_spec.get_column_spec(self.type)
if column_spec is None:
return False
return column_spec.is_dttm

def get_sqla_col(self, label: Optional[str] = None) -> Column:
label = label or self.column_name
if self.expression:
col = literal_column(self.expression)
else:
db_engine_spec = self.table.database.db_engine_spec
type_ = db_engine_spec.get_sqla_column_type(self.type)
column_spec = db_engine_spec.get_column_spec(self.type)
type_ = column_spec.sqla_type if column_spec else None
col = column(self.column_name, type_=type_)
col = self.table.make_sqla_column_compatible(col, label)
return col
Expand Down
162 changes: 143 additions & 19 deletions superset/db_engine_specs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import sqlparse
from flask import g
from flask_babel import lazy_gettext as _
from sqlalchemy import column, DateTime, select
from sqlalchemy import column, DateTime, select, types
from sqlalchemy.engine.base import Engine
from sqlalchemy.engine.interfaces import Compiled, Dialect
from sqlalchemy.engine.reflection import Inspector
Expand All @@ -57,6 +57,7 @@
from superset.models.sql_lab import Query
from superset.sql_parse import ParsedQuery, Table
from superset.utils import core as utils
from superset.utils.core import ColumnSpec, GenericDataType

if TYPE_CHECKING:
# prevent circular imports
Expand Down Expand Up @@ -145,8 +146,84 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
_date_trunc_functions: Dict[str, str] = {}
_time_grain_expressions: Dict[Optional[str], str] = {}
column_type_mappings: Tuple[
Tuple[Pattern[str], Union[TypeEngine, Callable[[Match[str]], TypeEngine]]], ...,
] = ()
Tuple[
Pattern[str],
Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
GenericDataType,
],
...,
] = (
(
re.compile(r"^smallint", re.IGNORECASE),
types.SmallInteger(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^integer", re.IGNORECASE),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To make sure we match Mysql INT type, we could just change this to ^INT to match both INT and INTEGER, unless there are any known incompatible types that could cause a collision.

Copy link
Contributor Author

@nikolagigic nikolagigic Mar 12, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is an INTEGER type in mysql dialect.

types.Integer(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^bigint", re.IGNORECASE),
types.BigInteger(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^decimal", re.IGNORECASE),
types.Numeric(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^numeric", re.IGNORECASE),
types.Numeric(),
GenericDataType.NUMERIC,
),
(re.compile(r"^real", re.IGNORECASE), types.REAL, GenericDataType.NUMERIC,),
(
re.compile(r"^smallserial", re.IGNORECASE),
types.SmallInteger(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^serial", re.IGNORECASE),
types.Integer(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^bigserial", re.IGNORECASE),
types.BigInteger(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^varchar", re.IGNORECASE),
types.VARCHAR(),
GenericDataType.STRING,
),
(re.compile(r"^char", re.IGNORECASE), types.CHAR(), GenericDataType.STRING),
(re.compile(r"^text", re.IGNORECASE), types.Text(), GenericDataType.STRING),
(re.compile(r"^date", re.IGNORECASE), types.Date(), GenericDataType.TEMPORAL,),
(
re.compile(r"^timestamp", re.IGNORECASE),
types.TIMESTAMP(),
GenericDataType.TEMPORAL,
),
(
re.compile(r"^timestamptz", re.IGNORECASE),
types.TIMESTAMP(timezone=True),
GenericDataType.TEMPORAL,
),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^timestamptz would never be caught as it's already matching ^timestamp above. I suggest removing timestamptz support for now (timezones aren't really properly supported yet).

(
re.compile(r"^interval", re.IGNORECASE),
types.Interval(),
GenericDataType.TEMPORAL,
),
(re.compile(r"^time", re.IGNORECASE), types.Time(), GenericDataType.TEMPORAL,),
(
re.compile(r"^boolean", re.IGNORECASE),
types.Boolean(),
GenericDataType.BOOLEAN,
),
)
time_groupby_inline = False
limit_method = LimitMethod.FORCE_LIMIT
time_secondary_columns = False
Expand All @@ -162,21 +239,17 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods

# default matching patterns to convert database specific column types to
# more generic types
db_column_types: Dict[utils.GenericDataType, Tuple[Pattern[str], ...]] = {
utils.GenericDataType.NUMERIC: (
db_column_types: Dict[GenericDataType, Tuple[Pattern[str], ...]] = {
GenericDataType.NUMERIC: (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this map anymore? I believe this can be achieved with get_column_spec

re.compile(r"BIT", re.IGNORECASE),
re.compile(
r".*(DOUBLE|FLOAT|INT|NUMBER|REAL|NUMERIC|DECIMAL|MONEY).*",
re.IGNORECASE,
),
re.compile(r".*LONG$", re.IGNORECASE),
),
utils.GenericDataType.STRING: (
re.compile(r".*(CHAR|STRING|TEXT).*", re.IGNORECASE),
),
utils.GenericDataType.TEMPORAL: (
re.compile(r".*(DATE|TIME).*", re.IGNORECASE),
),
GenericDataType.STRING: (re.compile(r".*(CHAR|STRING|TEXT).*", re.IGNORECASE),),
GenericDataType.TEMPORAL: (re.compile(r".*(DATE|TIME).*", re.IGNORECASE),),
}

@classmethod
Expand Down Expand Up @@ -210,7 +283,7 @@ def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception:

@classmethod
def is_db_column_type_match(
cls, db_column_type: Optional[str], target_column_type: utils.GenericDataType
cls, db_column_type: Optional[str], target_column_type: GenericDataType
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we can remove this method, as I don't see it being used anymore

) -> bool:
"""
Check if a column type satisfies a pattern in a collection of regexes found in
Expand Down Expand Up @@ -967,24 +1040,35 @@ def make_label_compatible(cls, label: str) -> Union[str, quoted_name]:
return label_mutated

@classmethod
def get_sqla_column_type(cls, type_: Optional[str]) -> Optional[TypeEngine]:
def get_sqla_column_type(
cls,
column_type: Optional[str],
column_type_mappings: Tuple[
Tuple[
Pattern[str],
Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
GenericDataType,
],
...,
] = column_type_mappings,
Comment on lines +1015 to +1022
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of passing the mapping to the method, we can probably just call the cls.column_type_mappings property in the method call.

) -> Union[Tuple[TypeEngine, GenericDataType], None]:
"""
Return a sqlalchemy native column type that corresponds to the column type
defined in the data source (return None to use default type inferred by
SQLAlchemy). Override `column_type_mappings` for specific needs
(see MSSQL for example of NCHAR/NVARCHAR handling).

:param type_: Column type returned by inspector
:param column_type: Column type returned by inspector
:return: SqlAlchemy column type
"""
if not type_:
if not column_type:
return None
for regex, sqla_type in cls.column_type_mappings:
match = regex.match(type_)
for regex, sqla_type, generic_type in column_type_mappings:
match = regex.match(column_type)
if match:
if callable(sqla_type):
return sqla_type(match)
return sqla_type
return sqla_type(match), generic_type
villebro marked this conversation as resolved.
Show resolved Hide resolved
return sqla_type, generic_type
return None

@staticmethod
Expand Down Expand Up @@ -1101,3 +1185,43 @@ def is_readonly_query(cls, parsed_query: ParsedQuery) -> bool:
or parsed_query.is_explain()
or parsed_query.is_show()
)

@classmethod
def get_column_spec(
cls,
native_type: Optional[str],
source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
column_type_mappings: Tuple[
Tuple[
Pattern[str],
Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
GenericDataType,
],
...,
] = column_type_mappings,
) -> Union[ColumnSpec, None]:
"""
Converts native database type to sqlalchemy column type.
:param native_type: Native database typee
:param source: Type coming from the database table or cursor description
:return: ColumnSpec object
"""
column_type = None

if (
cls.get_sqla_column_type(
native_type, column_type_mappings=column_type_mappings
)
is not None
):
column_type, generic_type = cls.get_sqla_column_type( # type: ignore
native_type, column_type_mappings=column_type_mappings
)
is_dttm = generic_type == GenericDataType.TEMPORAL

if column_type:
return ColumnSpec(
sqla_type=column_type, generic_type=generic_type, is_dttm=is_dttm
)

return None
12 changes: 10 additions & 2 deletions superset/db_engine_specs/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,16 @@ def fetch_data(
return cls.pyodbc_rows_to_tuples(data)

column_type_mappings = (
(re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), UnicodeText()),
(re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), String()),
(
re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE),
UnicodeText(),
utils.GenericDataType.STRING,
),
(
re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE),
String(),
utils.GenericDataType.STRING,
),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would probably design this so that an engine spec can extend the base mapping. In the case of MSSQL, I believe the base mapping is a good fallback. Also, we might consider incorporating these types into the base spec, as I assume fairly many engines support N-prefixed character types, and some of those engines might also benefit from the UnicodeText SQLA type over the regular String one.

)

@classmethod
Expand Down
54 changes: 53 additions & 1 deletion superset/db_engine_specs/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,28 @@
import logging
import re
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
from typing import (
Any,
Callable,
Dict,
List,
Match,
Optional,
Pattern,
Tuple,
TYPE_CHECKING,
Union,
)

from pytz import _FixedOffset # type: ignore
from sqlalchemy.dialects.postgresql import ARRAY, DOUBLE_PRECISION, ENUM, JSON
from sqlalchemy.dialects.postgresql.base import PGInspector
from sqlalchemy.types import String, TypeEngine

from superset.db_engine_specs.base import BaseEngineSpec
from superset.exceptions import SupersetException
from superset.utils import core as utils
from superset.utils.core import ColumnSpec, GenericDataType

if TYPE_CHECKING:
from superset.models.core import Database # pragma: no cover
Expand Down Expand Up @@ -77,6 +91,21 @@ class PostgresEngineSpec(PostgresBaseEngineSpec):
max_column_name_length = 63
try_remove_schema_from_table_name = False

column_type_mappings = (
(
re.compile(r"^double precision", re.IGNORECASE),
DOUBLE_PRECISION(),
GenericDataType.NUMERIC,
),
(
re.compile(r"^array.*", re.IGNORECASE),
lambda match: ARRAY(int(match[2])) if match[2] else String(),
utils.GenericDataType.STRING,
),
(re.compile(r"^json.*", re.IGNORECASE), JSON(), utils.GenericDataType.STRING,),
(re.compile(r"^enum.*", re.IGNORECASE), ENUM(), utils.GenericDataType.STRING,),
)

@classmethod
def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
return True
Expand Down Expand Up @@ -144,3 +173,26 @@ def get_extra_params(database: "Database") -> Dict[str, Any]:
engine_params["connect_args"] = connect_args
extra["engine_params"] = engine_params
return extra

@classmethod
def get_column_spec( # type: ignore
cls,
native_type: Optional[str],
source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
column_type_mappings: Tuple[
Tuple[
Pattern[str],
Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
GenericDataType,
],
...,
] = column_type_mappings,
) -> Union[ColumnSpec, None]:

column_spec = super().get_column_spec(native_type)
if column_spec:
return column_spec

return super().get_column_spec(
native_type, column_type_mappings=column_type_mappings
)
Loading