Skip to content

Commit

Permalink
fix: stringify ValueErrors for NaT types (apache#22628)
Browse files Browse the repository at this point in the history
  • Loading branch information
eschutho authored Jan 6, 2023
1 parent 7591acb commit 804e89d
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
1 change: 1 addition & 0 deletions superset/result_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def __init__( # pylint: disable=too-many-locals
pa.lib.ArrowInvalid,
pa.lib.ArrowTypeError,
pa.lib.ArrowNotImplementedError,
ValueError,
TypeError, # this is super hackey,
# https://issues.apache.org/jira/browse/ARROW-7855
):
Expand Down
18 changes: 18 additions & 0 deletions tests/unit_tests/dataframe_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import pytest
from pandas import Timestamp
from pandas._libs.tslibs import NaT

from superset.dataframe import df_to_records
from superset.superset_typing import DbapiDescription
Expand All @@ -41,6 +42,23 @@ def test_df_to_records() -> None:
]


def test_df_to_records_NaT_type() -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet

data = [(NaT,), (Timestamp("2023-01-06 20:50:31.749000+0000", tz="UTC"),)]
cursor_descr: DbapiDescription = [
("date", "timestamp with time zone", None, None, None, None, False)
]
results = SupersetResultSet(data, cursor_descr, BaseEngineSpec)
df = results.to_pandas_df()

assert df_to_records(df) == [
{"date": None},
{"date": '"2023-01-06T20:50:31.749000+00:00"'},
]


def test_js_max_int() -> None:
from superset.db_engine_specs import BaseEngineSpec
from superset.result_set import SupersetResultSet
Expand Down
34 changes: 34 additions & 0 deletions tests/unit_tests/result_set_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,37 @@ def test_stringify_with_null_integers():
)

assert np.array_equal(result_set, expected)


def test_stringify_with_null_timestamps():
"""
Test that we can safely handle type errors when a timestamp column has a null value
"""

data = [
("foo", "bar", pd.NaT, None),
("foo", "bar", pd.NaT, True),
("foo", "bar", pd.NaT, None),
]
numpy_dtype = [
("id", "object"),
("value", "object"),
("num", "object"),
("bool", "object"),
]

array2 = np.array(data, dtype=numpy_dtype)
column_names = ["id", "value", "num", "bool"]

result_set = np.array([stringify_values(array2[column]) for column in column_names])

expected = np.array(
[
array(['"foo"', '"foo"', '"foo"'], dtype=object),
array(['"bar"', '"bar"', '"bar"'], dtype=object),
array([None, None, None], dtype=object),
array([None, "true", None], dtype=object),
]
)

assert np.array_equal(result_set, expected)

0 comments on commit 804e89d

Please sign in to comment.