Skip to content

Commit

Permalink
BUG: prevent coercion to datetime64[ns] when a Series is initialized …
Browse files Browse the repository at this point in the history
…with both tz-naive and tz-aware (pandas-dev#18361)
  • Loading branch information
jamestran201-alt authored and jreback committed Nov 23, 2017
1 parent 4e98a7b commit 41004d9
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 4 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.22.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,5 +195,5 @@ Other
^^^^^

- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
-
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
-
20 changes: 17 additions & 3 deletions pandas/_libs/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,8 @@ cpdef object infer_datetimelike_array(object arr):
- timedelta: we have *only* timedeltas and maybe strings, nulls
- nat: we do not have *any* date, datetimes or timedeltas, but do have
at least a NaT
- mixed: other objects (strings or actual objects)
- mixed: other objects (strings, a mix of tz-aware and tz-naive, or
actual objects)
Parameters
----------
Expand All @@ -479,6 +480,7 @@ cpdef object infer_datetimelike_array(object arr):
cdef:
Py_ssize_t i, n = len(arr)
bint seen_timedelta = 0, seen_date = 0, seen_datetime = 0
bint seen_tz_aware = 0, seen_tz_naive = 0
bint seen_nat = 0
list objs = []
object v
Expand All @@ -496,8 +498,20 @@ cpdef object infer_datetimelike_array(object arr):
pass
elif v is NaT:
seen_nat = 1
elif is_datetime(v) or util.is_datetime64_object(v):
# datetime, or np.datetime64
elif is_datetime(v):
# datetime
seen_datetime = 1

# disambiguate between tz-naive and tz-aware
if v.tzinfo is None:
seen_tz_naive = 1
else:
seen_tz_aware = 1

if seen_tz_naive and seen_tz_aware:
return 'mixed'
elif util.is_datetime64_object(v):
# np.datetime64
seen_datetime = 1
elif is_date(v):
seen_date = 1
Expand Down
70 changes: 70 additions & 0 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,10 @@ def test_mixed_dtypes_remain_object_array(self):

class TestTypeInference(object):

# Dummy class used for testing with Python objects
class Dummy():
pass

def test_length_zero(self):
result = lib.infer_dtype(np.array([], dtype='i4'))
assert result == 'integer'
Expand Down Expand Up @@ -655,6 +659,72 @@ def test_infer_dtype_period(self):
dtype=object)
assert lib.infer_dtype(arr) == 'mixed'

@pytest.mark.parametrize(
"data",
[
[datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)],
[Timestamp("20170612"), Timestamp("20170311")],
[Timestamp("20170612", tz='US/Eastern'),
Timestamp("20170311", tz='US/Eastern')],
[date(2017, 6, 12),
Timestamp("20170311", tz='US/Eastern')],
[np.datetime64("2017-06-12"), np.datetime64("2017-03-11")],
[np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)]
]
)
def test_infer_datetimelike_array_datetime(self, data):
assert lib.infer_datetimelike_array(data) == "datetime"

@pytest.mark.parametrize(
"data",
[
[timedelta(2017, 6, 12), timedelta(2017, 3, 11)],
[timedelta(2017, 6, 12), date(2017, 3, 11)],
[np.timedelta64(2017, "D"), np.timedelta64(6, "s")],
[np.timedelta64(2017, "D"), timedelta(2017, 3, 11)]
]
)
def test_infer_datetimelike_array_timedelta(self, data):
assert lib.infer_datetimelike_array(data) == "timedelta"

def test_infer_datetimelike_array_date(self):
arr = [date(2017, 6, 12), date(2017, 3, 11)]
assert lib.infer_datetimelike_array(arr) == "date"

@pytest.mark.parametrize(
"data",
[
["2017-06-12", "2017-03-11"],
[20170612, 20170311],
[20170612.5, 20170311.8],
[Dummy(), Dummy()],
[Timestamp("20170612"), Timestamp("20170311", tz='US/Eastern')],
[Timestamp("20170612"), 20170311],
[timedelta(2017, 6, 12), Timestamp("20170311", tz='US/Eastern')]
]
)
def test_infer_datetimelike_array_mixed(self, data):
assert lib.infer_datetimelike_array(data) == "mixed"

@pytest.mark.parametrize(
"first, expected",
[
[[None], "mixed"],
[[np.nan], "mixed"],
[[pd.NaT], "nat"],
[[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"],
[[np.datetime64("2017-06-12"), pd.NaT], "datetime"],
[[date(2017, 6, 12), pd.NaT], "date"],
[[timedelta(2017, 6, 12), pd.NaT], "timedelta"],
[[np.timedelta64(2017, "D"), pd.NaT], "timedelta"]
]
)
@pytest.mark.parametrize("second", [None, np.nan])
def test_infer_datetimelike_array_nan_nat_like(self, first, second,
expected):
first.append(second)
assert lib.infer_datetimelike_array(first) == expected

def test_infer_dtype_all_nan_nat_like(self):
arr = np.array([np.nan, np.nan])
assert lib.infer_dtype(arr) == 'floating'
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,15 @@ def f():
s = Series([pd.NaT, np.nan, '1 Day'])
assert s.dtype == 'timedelta64[ns]'

# GH 16406
def test_constructor_mixed_tz(self):
s = Series([Timestamp('20130101'),
Timestamp('20130101', tz='US/Eastern')])
expected = Series([Timestamp('20130101'),
Timestamp('20130101', tz='US/Eastern')],
dtype='object')
assert_series_equal(s, expected)

def test_NaT_scalar(self):
series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]')

Expand Down

0 comments on commit 41004d9

Please sign in to comment.