Skip to content

Commit

Permalink
FIX preserve dtype with datetime columns of different resolution when…
Browse files Browse the repository at this point in the history
… merging (#53213)
  • Loading branch information
glemaitre authored May 14, 2023
1 parent 3cfd868 commit 935244a
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Bug fixes
- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`)
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
- Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`)
- Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`)
- Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`)
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
Expand Down
8 changes: 7 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1395,6 +1395,12 @@ def _maybe_coerce_merge_keys(self) -> None:
rk.dtype, DatetimeTZDtype
):
raise ValueError(msg)
elif (
isinstance(lk.dtype, DatetimeTZDtype)
and isinstance(rk.dtype, DatetimeTZDtype)
) or (lk.dtype.kind == "M" and rk.dtype.kind == "M"):
# allows datetime with different resolutions
continue

elif lk_is_object and rk_is_object:
continue
Expand Down Expand Up @@ -2352,7 +2358,7 @@ def _factorize_keys(
if isinstance(lk.dtype, DatetimeTZDtype) and isinstance(rk.dtype, DatetimeTZDtype):
# Extract the ndarray (UTC-localized) values
# Note: we dont need the dtypes to match, as these can still be compared
# TODO(non-nano): need to make sure resolutions match
lk, rk = cast("DatetimeArray", lk)._ensure_matching_resos(rk)
lk = cast("DatetimeArray", lk)._ndarray
rk = cast("DatetimeArray", rk)._ndarray

Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import numpy as np
import pytest
import pytz

from pandas.core.dtypes.common import is_object_dtype
from pandas.core.dtypes.dtypes import CategoricalDtype
Expand Down Expand Up @@ -2773,3 +2774,26 @@ def test_merge_arrow_and_numpy_dtypes(dtype):
result = df2.merge(df)
expected = df2.copy()
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("tzinfo", [None, pytz.timezone("America/Chicago")])
def test_merge_datetime_different_resolution(tzinfo):
# https://github.com/pandas-dev/pandas/issues/53200
df1 = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo, unit="ns")],
"a": [1],
}
)
df2 = df1.copy()
df2["t"] = df2["t"].dt.as_unit("s")

expected = DataFrame(
{
"t": [pd.Timestamp(2023, 5, 12, tzinfo=tzinfo)],
"a_x": [1],
"a_y": [1],
}
)
result = df1.merge(df2, on="t")
tm.assert_frame_equal(result, expected)

0 comments on commit 935244a

Please sign in to comment.