From 19d1dd5570e88c5e7b9bd97b9ba01ad38e6b5815 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 11 Aug 2023 00:25:33 -0700 Subject: [PATCH] fix: timezone issue in Pandas 2 (#24955) --- superset/result_set.py | 9 ++++----- tests/unit_tests/result_set_test.py | 26 +++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/superset/result_set.py b/superset/result_set.py index 4ca39cba2b627..82832eb8ea4ac 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -167,12 +167,11 @@ def __init__( # pylint: disable=too-many-locals try: if sample.tzinfo: tz = sample.tzinfo - series = pd.Series( - array[column], dtype="datetime64[ns]" - ) - series = pd.to_datetime(series).dt.tz_localize(tz) + series = pd.Series(array[column]) + series = pd.to_datetime(series) pa_data[i] = pa.Array.from_pandas( - series, type=pa.timestamp("ns", tz=tz) + series, + type=pa.timestamp("ns", tz=tz), ) except Exception as ex: # pylint: disable=broad-except logger.exception(ex) diff --git a/tests/unit_tests/result_set_test.py b/tests/unit_tests/result_set_test.py index 331810bb1ed62..a629c2e2ec767 100644 --- a/tests/unit_tests/result_set_test.py +++ b/tests/unit_tests/result_set_test.py @@ -17,12 +17,15 @@ # pylint: disable=import-outside-toplevel, unused-argument +from datetime import datetime, timezone import numpy as np import pandas as pd from numpy.core.multiarray import array +from pytest_mock import MockerFixture -from superset.result_set import stringify_values +from superset.db_engine_specs.base import BaseEngineSpec +from superset.result_set import stringify_values, SupersetResultSet def test_column_names_as_bytes() -> None: @@ -140,3 +143,24 @@ def test_stringify_with_null_timestamps(): ) assert np.array_equal(result_set, expected) + + +def test_timezone_series(mocker: MockerFixture) -> None: + """ + Test that we can handle timezone-aware datetimes correctly. + + This covers a regression that happened when upgrading from Pandas 1.5.3 to 2.0.3. + """ + logger = mocker.patch("superset.result_set.logger") + + data = [[datetime(2023, 1, 1, tzinfo=timezone.utc)]] + description = [(b"__time", "datetime", None, None, None, None, False)] + result_set = SupersetResultSet( + data, + description, # type: ignore + BaseEngineSpec, + ) + assert result_set.to_pandas_df().values.tolist() == [ + [pd.Timestamp("2023-01-01 00:00:00+0000", tz="UTC")] + ] + logger.exception.assert_not_called()