apache · villebro · Mar 12, 2021 · Feb 23, 2021 · Feb 23, 2021 · Feb 23, 2021
diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py
@@ -70,6 +70,7 @@
 from superset.sql_parse import ParsedQuery
 from superset.typing import Metric, QueryObjectDict
 from superset.utils import core as utils
+from superset.utils.core import GenericDataType
 
 config = app.config
 metadata = Model.metadata  # pylint: disable=no-member
@@ -187,20 +188,20 @@ def is_numeric(self) -> bool:
         """
         Check if the column has a numeric datatype.
         """
-        db_engine_spec = self.table.database.db_engine_spec
-        return db_engine_spec.is_db_column_type_match(
-            self.type, utils.GenericDataType.NUMERIC
-        )
+        column_spec = self.table.database.db_engine_spec.get_column_spec(self.type)
+        if column_spec is None:
+            return False
+        return column_spec.generic_type == GenericDataType.NUMERIC
 
     @property
     def is_string(self) -> bool:
         """
         Check if the column has a string datatype.
         """
-        db_engine_spec = self.table.database.db_engine_spec
-        return db_engine_spec.is_db_column_type_match(
-            self.type, utils.GenericDataType.STRING
-        )
+        column_spec = self.table.database.db_engine_spec.get_column_spec(self.type)
+        if column_spec is None:
+            return False
+        return column_spec.generic_type == GenericDataType.STRING
 
     @property
     def is_temporal(self) -> bool:
@@ -212,18 +213,19 @@ def is_temporal(self) -> bool:
         """
         if self.is_dttm is not None:
             return self.is_dttm
-        db_engine_spec = self.table.database.db_engine_spec
-        return db_engine_spec.is_db_column_type_match(
-            self.type, utils.GenericDataType.TEMPORAL
-        )
+        column_spec = self.table.database.db_engine_spec.get_column_spec(self.type)
+        if column_spec is None:
+            return False
+        return column_spec.is_dttm
 
     def get_sqla_col(self, label: Optional[str] = None) -> Column:
         label = label or self.column_name
         if self.expression:
             col = literal_column(self.expression)
         else:
             db_engine_spec = self.table.database.db_engine_spec
-            type_ = db_engine_spec.get_sqla_column_type(self.type)
+            column_spec = db_engine_spec.get_column_spec(self.type)
+            type_ = column_spec.sqla_type if column_spec else None
             col = column(self.column_name, type_=type_)
         col = self.table.make_sqla_column_compatible(col, label)
         return col

diff --git a/superset/db_engine_specs/base.py b/superset/db_engine_specs/base.py
@@ -41,7 +41,7 @@
 import sqlparse
 from flask import g
 from flask_babel import lazy_gettext as _
-from sqlalchemy import column, DateTime, select
+from sqlalchemy import column, DateTime, select, types
 from sqlalchemy.engine.base import Engine
 from sqlalchemy.engine.interfaces import Compiled, Dialect
 from sqlalchemy.engine.reflection import Inspector
@@ -57,6 +57,7 @@
 from superset.models.sql_lab import Query
 from superset.sql_parse import ParsedQuery, Table
 from superset.utils import core as utils
+from superset.utils.core import ColumnSpec, GenericDataType
 
 if TYPE_CHECKING:
     # prevent circular imports
@@ -145,8 +146,84 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
     _date_trunc_functions: Dict[str, str] = {}
     _time_grain_expressions: Dict[Optional[str], str] = {}
     column_type_mappings: Tuple[
-        Tuple[Pattern[str], Union[TypeEngine, Callable[[Match[str]], TypeEngine]]], ...,
-    ] = ()
+        Tuple[
+            Pattern[str],
+            Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
+            GenericDataType,
+        ],
+        ...,
+    ] = (
+        (
+            re.compile(r"^smallint", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^integer", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigint", re.IGNORECASE),
+            types.BigInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^decimal", re.IGNORECASE),
+            types.Numeric(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^numeric", re.IGNORECASE),
+            types.Numeric(),
+            GenericDataType.NUMERIC,
+        ),
+        (re.compile(r"^real", re.IGNORECASE), types.REAL, GenericDataType.NUMERIC,),
+        (
+            re.compile(r"^smallserial", re.IGNORECASE),
+            types.SmallInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^serial", re.IGNORECASE),
+            types.Integer(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^bigserial", re.IGNORECASE),
+            types.BigInteger(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^varchar", re.IGNORECASE),
+            types.VARCHAR(),
+            GenericDataType.STRING,
+        ),
+        (re.compile(r"^char", re.IGNORECASE), types.CHAR(), GenericDataType.STRING),
+        (re.compile(r"^text", re.IGNORECASE), types.Text(), GenericDataType.STRING),
+        (re.compile(r"^date", re.IGNORECASE), types.Date(), GenericDataType.TEMPORAL,),
+        (
+            re.compile(r"^timestamp", re.IGNORECASE),
+            types.TIMESTAMP(),
+            GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^timestamptz", re.IGNORECASE),
+            types.TIMESTAMP(timezone=True),
+            GenericDataType.TEMPORAL,
+        ),
+        (
+            re.compile(r"^interval", re.IGNORECASE),
+            types.Interval(),
+            GenericDataType.TEMPORAL,
+        ),
+        (re.compile(r"^time", re.IGNORECASE), types.Time(), GenericDataType.TEMPORAL,),
+        (
+            re.compile(r"^boolean", re.IGNORECASE),
+            types.Boolean(),
+            GenericDataType.BOOLEAN,
+        ),
+    )
     time_groupby_inline = False
     limit_method = LimitMethod.FORCE_LIMIT
     time_secondary_columns = False
@@ -162,21 +239,17 @@ class BaseEngineSpec:  # pylint: disable=too-many-public-methods
 
     # default matching patterns to convert database specific column types to
     # more generic types
-    db_column_types: Dict[utils.GenericDataType, Tuple[Pattern[str], ...]] = {
-        utils.GenericDataType.NUMERIC: (
+    db_column_types: Dict[GenericDataType, Tuple[Pattern[str], ...]] = {
+        GenericDataType.NUMERIC: (
             re.compile(r"BIT", re.IGNORECASE),
             re.compile(
                 r".*(DOUBLE|FLOAT|INT|NUMBER|REAL|NUMERIC|DECIMAL|MONEY).*",
                 re.IGNORECASE,
             ),
             re.compile(r".*LONG$", re.IGNORECASE),
         ),
-        utils.GenericDataType.STRING: (
-            re.compile(r".*(CHAR|STRING|TEXT).*", re.IGNORECASE),
-        ),
-        utils.GenericDataType.TEMPORAL: (
-            re.compile(r".*(DATE|TIME).*", re.IGNORECASE),
-        ),
+        GenericDataType.STRING: (re.compile(r".*(CHAR|STRING|TEXT).*", re.IGNORECASE),),
+        GenericDataType.TEMPORAL: (re.compile(r".*(DATE|TIME).*", re.IGNORECASE),),
     }
 
     @classmethod
@@ -210,7 +283,7 @@ def get_dbapi_mapped_exception(cls, exception: Exception) -> Exception:
 
     @classmethod
     def is_db_column_type_match(
-        cls, db_column_type: Optional[str], target_column_type: utils.GenericDataType
+        cls, db_column_type: Optional[str], target_column_type: GenericDataType
     ) -> bool:
         """
         Check if a column type satisfies a pattern in a collection of regexes found in
@@ -967,24 +1040,35 @@ def make_label_compatible(cls, label: str) -> Union[str, quoted_name]:
         return label_mutated
 
     @classmethod
-    def get_sqla_column_type(cls, type_: Optional[str]) -> Optional[TypeEngine]:
+    def get_sqla_column_type(
+        cls,
+        column_type: Optional[str],
+        column_type_mappings: Tuple[
+            Tuple[
+                Pattern[str],
+                Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
+                GenericDataType,
+            ],
+            ...,
+        ] = column_type_mappings,
+    ) -> Union[Tuple[TypeEngine, GenericDataType], None]:
         """
         Return a sqlalchemy native column type that corresponds to the column type
         defined in the data source (return None to use default type inferred by
         SQLAlchemy). Override `column_type_mappings` for specific needs
         (see MSSQL for example of NCHAR/NVARCHAR handling).
 
-        :param type_: Column type returned by inspector
+        :param column_type: Column type returned by inspector
         :return: SqlAlchemy column type
         """
-        if not type_:
+        if not column_type:
             return None
-        for regex, sqla_type in cls.column_type_mappings:
-            match = regex.match(type_)
+        for regex, sqla_type, generic_type in column_type_mappings:
+            match = regex.match(column_type)
             if match:
                 if callable(sqla_type):
-                    return sqla_type(match)
-                return sqla_type
+                    return sqla_type(match), generic_type
+                return sqla_type, generic_type
         return None
 
     @staticmethod
@@ -1101,3 +1185,43 @@ def is_readonly_query(cls, parsed_query: ParsedQuery) -> bool:
             or parsed_query.is_explain()
             or parsed_query.is_show()
         )
+
+    @classmethod
+    def get_column_spec(
+        cls,
+        native_type: Optional[str],
+        source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
+        column_type_mappings: Tuple[
+            Tuple[
+                Pattern[str],
+                Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
+                GenericDataType,
+            ],
+            ...,
+        ] = column_type_mappings,
+    ) -> Union[ColumnSpec, None]:
+        """
+        Converts native database type to sqlalchemy column type.
+        :param native_type: Native database typee
+        :param source: Type coming from the database table or cursor description
+        :return: ColumnSpec object
+        """
+        column_type = None
+
+        if (
+            cls.get_sqla_column_type(
+                native_type, column_type_mappings=column_type_mappings
+            )
+            is not None
+        ):
+            column_type, generic_type = cls.get_sqla_column_type(  # type: ignore
+                native_type, column_type_mappings=column_type_mappings
+            )
+            is_dttm = generic_type == GenericDataType.TEMPORAL
+
+        if column_type:
+            return ColumnSpec(
+                sqla_type=column_type, generic_type=generic_type, is_dttm=is_dttm
+            )
+
+        return None
diff --git a/superset/db_engine_specs/mssql.py b/superset/db_engine_specs/mssql.py
@@ -78,8 +78,16 @@ def fetch_data(
         return cls.pyodbc_rows_to_tuples(data)
 
     column_type_mappings = (
-        (re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE), UnicodeText()),
-        (re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE), String()),
+        (
+            re.compile(r"^N((VAR)?CHAR|TEXT)", re.IGNORECASE),
+            UnicodeText(),
+            utils.GenericDataType.STRING,
+        ),
+        (
+            re.compile(r"^((VAR)?CHAR|TEXT|STRING)", re.IGNORECASE),
+            String(),
+            utils.GenericDataType.STRING,
+        ),
     )
 
     @classmethod

diff --git a/superset/db_engine_specs/postgres.py b/superset/db_engine_specs/postgres.py
@@ -18,14 +18,28 @@
 import logging
 import re
 from datetime import datetime
-from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Match,
+    Optional,
+    Pattern,
+    Tuple,
+    TYPE_CHECKING,
+    Union,
+)
 
 from pytz import _FixedOffset  # type: ignore
+from sqlalchemy.dialects.postgresql import ARRAY, DOUBLE_PRECISION, ENUM, JSON
 from sqlalchemy.dialects.postgresql.base import PGInspector
+from sqlalchemy.types import String, TypeEngine
 
 from superset.db_engine_specs.base import BaseEngineSpec
 from superset.exceptions import SupersetException
 from superset.utils import core as utils
+from superset.utils.core import ColumnSpec, GenericDataType
 
 if TYPE_CHECKING:
     from superset.models.core import Database  # pragma: no cover
@@ -77,6 +91,21 @@ class PostgresEngineSpec(PostgresBaseEngineSpec):
     max_column_name_length = 63
     try_remove_schema_from_table_name = False
 
+    column_type_mappings = (
+        (
+            re.compile(r"^double precision", re.IGNORECASE),
+            DOUBLE_PRECISION(),
+            GenericDataType.NUMERIC,
+        ),
+        (
+            re.compile(r"^array.*", re.IGNORECASE),
+            lambda match: ARRAY(int(match[2])) if match[2] else String(),
+            utils.GenericDataType.STRING,
+        ),
+        (re.compile(r"^json.*", re.IGNORECASE), JSON(), utils.GenericDataType.STRING,),
+        (re.compile(r"^enum.*", re.IGNORECASE), ENUM(), utils.GenericDataType.STRING,),
+    )
+
     @classmethod
     def get_allow_cost_estimate(cls, extra: Dict[str, Any]) -> bool:
         return True
@@ -144,3 +173,26 @@ def get_extra_params(database: "Database") -> Dict[str, Any]:
             engine_params["connect_args"] = connect_args
             extra["engine_params"] = engine_params
         return extra
+
+    @classmethod
+    def get_column_spec(  # type: ignore
+        cls,
+        native_type: Optional[str],
+        source: utils.ColumnTypeSource = utils.ColumnTypeSource.GET_TABLE,
+        column_type_mappings: Tuple[
+            Tuple[
+                Pattern[str],
+                Union[TypeEngine, Callable[[Match[str]], TypeEngine]],
+                GenericDataType,
+            ],
+            ...,
+        ] = column_type_mappings,
+    ) -> Union[ColumnSpec, None]:
+
+        column_spec = super().get_column_spec(native_type)
+        if column_spec:
+            return column_spec
+
+        return super().get_column_spec(
+            native_type, column_type_mappings=column_type_mappings
+        )