diff --git a/superset-frontend/package-lock.json b/superset-frontend/package-lock.json index d17af4794a8f8..1307f994b5f40 100644 --- a/superset-frontend/package-lock.json +++ b/superset-frontend/package-lock.json @@ -19064,22 +19064,21 @@ } }, "@superset-ui/plugin-chart-echarts": { - "version": "0.17.5", - "resolved": "https://registry.npmjs.org/@superset-ui/plugin-chart-echarts/-/plugin-chart-echarts-0.17.5.tgz", - "integrity": "sha512-7HpREiMqUUR1wiYxZVOlR/25jMeWRsShBHulanGYGSR6DHuvjL1S/lq1WiK6qbUxqZ/iDhzoJpSsFzZxkhbwdA==", + "version": "0.17.6", + "resolved": "https://registry.npmjs.org/@superset-ui/plugin-chart-echarts/-/plugin-chart-echarts-0.17.6.tgz", + "integrity": "sha512-C0OXO7yrkVbHhILVb8fvxbM8O/+iB6eA8NCwwOiYNJZ/HfToZGr+ebbA6uCTf/A/WWdl8Qh/lIX8vz+GJe8d/A==", "requires": { "@superset-ui/chart-controls": "0.17.5", "@superset-ui/core": "0.17.5", - "@types/echarts": "^4.9.3", "@types/mathjs": "^6.0.7", - "echarts": "^5.0.0", + "echarts": "^5.0.1", "mathjs": "^8.0.1" } }, "@superset-ui/plugin-chart-table": { - "version": "0.17.5", - "resolved": "https://registry.npmjs.org/@superset-ui/plugin-chart-table/-/plugin-chart-table-0.17.5.tgz", - "integrity": "sha512-OJmQJkCtNZORjl2sLhkyJPQaCOV7igHJRDnVbRBDa5rTVoYETK5lb3j6fO/Oxq1PXufqwsf58SJeVzl3NCogHA==", + "version": "0.17.6", + "resolved": "https://registry.npmjs.org/@superset-ui/plugin-chart-table/-/plugin-chart-table-0.17.6.tgz", + "integrity": "sha512-s5pfX1/AxKuiTlBOovBjI8fmMjkn2gcjxWsrhjrvo9sfeCtZ64PObTobNiKHlKrc43YLf2ZKiNY9MiWBlsT3ZA==", "requires": { "@emotion/core": "^10.0.28", "@superset-ui/chart-controls": "0.17.5", @@ -20817,14 +20816,6 @@ "@types/node": "*" } }, - "@types/echarts": { - "version": "4.9.3", - "resolved": "https://registry.npmjs.org/@types/echarts/-/echarts-4.9.3.tgz", - "integrity": "sha512-CbgZUYdLy1G2BhCI6maBwVXmrqIx/D8KwUccMXQ9W2uyXNMjBvpIRXSs+UaBtvUihPV2f0g7LGj/yua1iY0VbQ==", - "requires": { - "@types/zrender": "*" - } - }, "@types/enzyme": { "version": "3.10.5", "resolved": "https://registry.npmjs.org/@types/enzyme/-/enzyme-3.10.5.tgz", @@ -21520,11 +21511,6 @@ "integrity": "sha512-FA/BWv8t8ZWJ+gEOnLLd8ygxH/2UFbAvgEonyfN6yWGLKc7zVjbpl2Y4CTjid9h2RfgPP6SEt6uHwEOply00yw==", "dev": true }, - "@types/zrender": { - "version": "4.0.0", - "resolved": "https://registry.npmjs.org/@types/zrender/-/zrender-4.0.0.tgz", - "integrity": "sha512-s89GOIeKFiod2KSqHkfd2rzx+T2DVu7ihZCBEBnhFrzvQPUmzvDSBot9Fi1DfMQm9Odg+rTqoMGC38RvrwJK2w==" - }, "@typescript-eslint/eslint-plugin": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-4.1.0.tgz", diff --git a/superset-frontend/package.json b/superset-frontend/package.json index 1ab6654a27c26..76721f67e4241 100644 --- a/superset-frontend/package.json +++ b/superset-frontend/package.json @@ -89,8 +89,8 @@ "@superset-ui/legacy-preset-chart-big-number": "^0.17.5", "@superset-ui/legacy-preset-chart-deckgl": "^0.4.1", "@superset-ui/legacy-preset-chart-nvd3": "^0.17.5", - "@superset-ui/plugin-chart-echarts": "^0.17.5", - "@superset-ui/plugin-chart-table": "^0.17.5", + "@superset-ui/plugin-chart-echarts": "^0.17.6", + "@superset-ui/plugin-chart-table": "^0.17.6", "@superset-ui/plugin-chart-word-cloud": "^0.17.5", "@superset-ui/preset-chart-xy": "^0.17.5", "@vx/responsive": "^0.0.195", diff --git a/superset/common/query_context.py b/superset/common/query_context.py index 6e3fc9f5cb598..01bc17056bbdf 100644 --- a/superset/common/query_context.py +++ b/superset/common/query_context.py @@ -181,7 +181,7 @@ def get_single_payload( status = payload["status"] if status != utils.QueryStatus.FAILED: payload["colnames"] = list(df.columns) - payload["coltypes"] = utils.serialize_pandas_dtypes(df.dtypes) + payload["coltypes"] = utils.extract_dataframe_dtypes(df) payload["data"] = self.get_data(df) del payload["df"] diff --git a/superset/utils/core.py b/superset/utils/core.py index 8c6da5c5e3f83..ceec948a393f9 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -74,6 +74,7 @@ from flask_appbuilder.security.sqla.models import Role, User from flask_babel import gettext as __ from flask_babel.speaklater import LazyString +from pandas.api.types import infer_dtype from sqlalchemy import event, exc, select, Text from sqlalchemy.dialects.mysql import MEDIUMTEXT from sqlalchemy.engine import Connection, Engine @@ -1401,19 +1402,29 @@ def get_column_names_from_metrics(metrics: List[Metric]) -> List[str]: return columns -def serialize_pandas_dtypes(dtypes: List[np.dtype]) -> List[GenericDataType]: - """Serialize pandas/numpy dtypes to JavaScript types""" - mapping = { - "object": GenericDataType.STRING, - "category": GenericDataType.STRING, - "datetime64[ns]": GenericDataType.TEMPORAL, - "int64": GenericDataType.NUMERIC, - "in32": GenericDataType.NUMERIC, - "float64": GenericDataType.NUMERIC, - "float32": GenericDataType.NUMERIC, - "bool": GenericDataType.BOOLEAN, +def extract_dataframe_dtypes(df: pd.DataFrame) -> List[GenericDataType]: + """Serialize pandas/numpy dtypes to generic types""" + + # omitting string types as those will be the default type + inferred_type_map: Dict[str, GenericDataType] = { + "floating": GenericDataType.NUMERIC, + "integer": GenericDataType.NUMERIC, + "mixed-integer-float": GenericDataType.NUMERIC, + "decimal": GenericDataType.NUMERIC, + "boolean": GenericDataType.BOOLEAN, + "datetime64": GenericDataType.TEMPORAL, + "datetime": GenericDataType.TEMPORAL, + "date": GenericDataType.TEMPORAL, } - return [mapping.get(str(x), GenericDataType.STRING) for x in dtypes] + + generic_types: List[GenericDataType] = [] + for column in df.columns: + series = df[column] + inferred_type = infer_dtype(series) + generic_type = inferred_type_map.get(inferred_type, GenericDataType.STRING) + generic_types.append(generic_type) + + return generic_types def indexed( diff --git a/tests/utils_tests.py b/tests/utils_tests.py index fbdf13115289b..ab2f6ea33d9f6 100644 --- a/tests/utils_tests.py +++ b/tests/utils_tests.py @@ -23,10 +23,12 @@ import json import os import re +from typing import Any, Tuple, List from unittest.mock import Mock, patch from tests.fixtures.birth_names_dashboard import load_birth_names_dashboard_with_slices -import numpy +import numpy as np +import pandas as pd import pytest from flask import Flask, g import marshmallow @@ -44,6 +46,7 @@ convert_legacy_filters_into_adhoc, create_ssl_cert_file, format_timedelta, + GenericDataType, get_form_data_token, get_iterable, get_email_address_list, @@ -57,6 +60,7 @@ merge_request_params, parse_ssl_cert, parse_js_uri_path_item, + extract_dataframe_dtypes, split, TimeRangeEndpoint, validate_json, @@ -113,9 +117,9 @@ def test_json_iso_dttm_ser(self): json_iso_dttm_ser("this is not a date") def test_base_json_conv(self): - assert isinstance(base_json_conv(numpy.bool_(1)), bool) is True - assert isinstance(base_json_conv(numpy.int64(1)), int) is True - assert isinstance(base_json_conv(numpy.array([1, 2, 3])), list) is True + assert isinstance(base_json_conv(np.bool_(1)), bool) is True + assert isinstance(base_json_conv(np.int64(1)), int) is True + assert isinstance(base_json_conv(np.array([1, 2, 3])), list) is True assert isinstance(base_json_conv(set([1])), list) is True assert isinstance(base_json_conv(Decimal("1.0")), float) is True assert isinstance(base_json_conv(uuid.uuid4()), str) is True @@ -1066,3 +1070,34 @@ def test_get_form_data_token(self): assert get_form_data_token({"token": "token_abcdefg1"}) == "token_abcdefg1" generated_token = get_form_data_token({}) assert re.match(r"^token_[a-z0-9]{8}$", generated_token) is not None + + def test_extract_dataframe_dtypes(self): + cols: Tuple[Tuple[str, GenericDataType, List[Any]], ...] = ( + ("dt", GenericDataType.TEMPORAL, [date(2021, 2, 4), date(2021, 2, 4)]), + ( + "dttm", + GenericDataType.TEMPORAL, + [datetime(2021, 2, 4, 1, 1, 1), datetime(2021, 2, 4, 1, 1, 1)], + ), + ("str", GenericDataType.STRING, ["foo", "foo"]), + ("int", GenericDataType.NUMERIC, [1, 1]), + ("float", GenericDataType.NUMERIC, [0.5, 0.5]), + ("mixed-int-float", GenericDataType.NUMERIC, [0.5, 1.0]), + ("bool", GenericDataType.BOOLEAN, [True, False]), + ("mixed-str-int", GenericDataType.STRING, ["abc", 1.0]), + ("obj", GenericDataType.STRING, [{"a": 1}, {"a": 1}]), + ("dt_null", GenericDataType.TEMPORAL, [None, date(2021, 2, 4)]), + ( + "dttm_null", + GenericDataType.TEMPORAL, + [None, datetime(2021, 2, 4, 1, 1, 1)], + ), + ("str_null", GenericDataType.STRING, [None, "foo"]), + ("int_null", GenericDataType.NUMERIC, [None, 1]), + ("float_null", GenericDataType.NUMERIC, [None, 0.5]), + ("bool_null", GenericDataType.BOOLEAN, [None, False]), + ("obj_null", GenericDataType.STRING, [None, {"a": 1}]), + ) + + df = pd.DataFrame(data={col[0]: col[2] for col in cols}) + assert extract_dataframe_dtypes(df) == [col[1] for col in cols]