diff --git a/superset/result_set.py b/superset/result_set.py index a6f39f76bf4df..373aaf1b723f3 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -135,6 +135,7 @@ def __init__( # pylint: disable=too-many-locals pa.lib.ArrowInvalid, pa.lib.ArrowTypeError, pa.lib.ArrowNotImplementedError, + ValueError, TypeError, # this is super hackey, # https://issues.apache.org/jira/browse/ARROW-7855 ): diff --git a/tests/unit_tests/dataframe_test.py b/tests/unit_tests/dataframe_test.py index 016d2f4d9bae4..3d8bd15aeff33 100644 --- a/tests/unit_tests/dataframe_test.py +++ b/tests/unit_tests/dataframe_test.py @@ -19,6 +19,7 @@ import pytest from pandas import Timestamp +from pandas._libs.tslibs import NaT from superset.dataframe import df_to_records from superset.superset_typing import DbapiDescription @@ -41,6 +42,23 @@ def test_df_to_records() -> None: ] +def test_df_to_records_NaT_type() -> None: + from superset.db_engine_specs import BaseEngineSpec + from superset.result_set import SupersetResultSet + + data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)] + cursor_descr: DbapiDescription = [ + ("date", "timestamp with time zone", None, None, None, None, False) + ] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + df = results.to_pandas_df() + + assert df_to_records(df) == [ + {"date": None}, + {"date": '"2023-01-06T20:50:31.749000+00:00"'}, + ] + + def test_js_max_int() -> None: from superset.db_engine_specs import BaseEngineSpec from superset.result_set import SupersetResultSet diff --git a/tests/unit_tests/result_set_test.py b/tests/unit_tests/result_set_test.py index e7371f5c0fa3d..0a78e0a5edd0a 100644 --- a/tests/unit_tests/result_set_test.py +++ b/tests/unit_tests/result_set_test.py @@ -106,3 +106,37 @@ def test_stringify_with_null_integers(): ) assert np.array_equal(result_set, expected) + + +def test_stringify_with_null_timestamps(): + """ + Test that we can safely handle type errors when a timestamp column has a null value + """ + + data = [ + ("foo", "bar", pd.NaT, None), + ("foo", "bar", pd.NaT, True), + ("foo", "bar", pd.NaT, None), + ] + numpy_dtype = [ + ("id", "object"), + ("value", "object"), + ("num", "object"), + ("bool", "object"), + ] + + array2 = np.array(data, dtype=numpy_dtype) + column_names = ["id", "value", "num", "bool"] + + result_set = np.array([stringify_values(array2[column]) for column in column_names]) + + expected = np.array( + [ + array(['"foo"', '"foo"', '"foo"'], dtype=object), + array(['"bar"', '"bar"', '"bar"'], dtype=object), + array([None, None, None], dtype=object), + array([None, "true", None], dtype=object), + ] + ) + + assert np.array_equal(result_set, expected)