Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix to_datetime not respecting dayfirst #58876

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.is_year_start` and :meth:`DatetimeIndex.is_quarter_start` returning ``False`` on double-digit frequencies (:issue:`58523`)
- Bug in :meth:`DatetimeIndex.union` when ``unit`` was non-nanosecond (:issue:`59036`)
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)

Timedelta
Expand Down
65 changes: 35 additions & 30 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -606,37 +606,42 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz,
# equiv: datetime.today().replace(tzinfo=tz)
return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=NPY_FR_us)
else:
string_to_dts_failed = string_to_dts(
ts, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
reso = get_supported_reso(out_bestunit)
check_dts_bounds(&dts, reso)
obj = _TSObject()
obj.dts = dts
obj.creso = reso
ival = npy_datetimestruct_to_datetime(reso, &dts)

if out_local == 1:
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
obj.value = tz_localize_to_utc_single(
ival, obj.tzinfo, ambiguous="raise", nonexistent=None, creso=reso
)
if tz is None:
check_overflows(obj, reso)
return obj
_adjust_tsobject_tz_using_offset(obj, tz)
return obj
else:
if tz is not None:
# shift for _localize_tso
ival = tz_localize_to_utc_single(
ival, tz, ambiguous="raise", nonexistent=None, creso=reso
if not dayfirst: # GH 58859
string_to_dts_failed = string_to_dts(
ts, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
reso = get_supported_reso(out_bestunit)
check_dts_bounds(&dts, reso)
obj = _TSObject()
obj.dts = dts
obj.creso = reso
ival = npy_datetimestruct_to_datetime(reso, &dts)

if out_local == 1:
obj.tzinfo = timezone(timedelta(minutes=out_tzoffset))
obj.value = tz_localize_to_utc_single(
ival,
obj.tzinfo,
ambiguous="raise",
nonexistent=None,
creso=reso,
)
obj.value = ival
maybe_localize_tso(obj, tz, obj.creso)
return obj
if tz is None:
check_overflows(obj, reso)
return obj
_adjust_tsobject_tz_using_offset(obj, tz)
return obj
else:
if tz is not None:
# shift for _localize_tso
ival = tz_localize_to_utc_single(
ival, tz, ambiguous="raise", nonexistent=None, creso=reso
)
obj.value = ival
maybe_localize_tso(obj, tz, obj.creso)
return obj

dt = parse_datetime_string(
ts,
Expand Down
49 changes: 25 additions & 24 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -377,32 +377,33 @@ def parse_datetime_string_with_reso(
raise ValueError(f'Given date string "{date_string}" not likely a datetime')

# Try iso8601 first, as it handles nanoseconds
string_to_dts_failed = string_to_dts(
date_string, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
# The new resolution will just be nano
# GH#50417
if out_bestunit in _timestamp_units:
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns

if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
# TODO: avoid circular import
from pandas import Timestamp
parsed = Timestamp(date_string)
else:
if out_local:
tz = timezone(timedelta(minutes=out_tzoffset))
if not dayfirst: # GH 58859
string_to_dts_failed = string_to_dts(
date_string, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
# Match Timestamp and drop picoseconds, femtoseconds, attoseconds
# The new resolution will just be nano
# GH#50417
if out_bestunit in _timestamp_units:
out_bestunit = NPY_DATETIMEUNIT.NPY_FR_ns

if out_bestunit == NPY_DATETIMEUNIT.NPY_FR_ns:
# TODO: avoid circular import
from pandas import Timestamp
parsed = Timestamp(date_string)
else:
tz = None
parsed = datetime_new(
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
)
if out_local:
tz = timezone(timedelta(minutes=out_tzoffset))
else:
tz = None
parsed = datetime_new(
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, tz
)

reso = npy_unit_to_attrname[out_bestunit]
return parsed, reso
reso = npy_unit_to_attrname[out_bestunit]
return parsed, reso

parsed = _parse_delimited_date(date_string, dayfirst, &out_bestunit)
if parsed is not None:
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2988,6 +2988,8 @@ def test_parsers_nat(self):
("20/12/21", True, False, datetime(2021, 12, 20)),
("20/12/21", False, True, datetime(2020, 12, 21)),
("20/12/21", True, True, datetime(2020, 12, 21)),
# GH 58859
("20201012", True, False, datetime(2020, 12, 10)),
],
)
def test_parsers_dayfirst_yearfirst(
Expand Down