From 009ffa8d2c019ffb757fb0a4b53cc7a9a948afdd Mon Sep 17 00:00:00 2001 From: Michael Marino Date: Sat, 17 Oct 2020 02:07:06 +0200 Subject: [PATCH] BUG: Fix parsing of ISO8601 durations (#37159) --- doc/source/whatsnew/v1.2.0.rst | 3 +- pandas/_libs/tslibs/timedeltas.pyx | 28 +++++++++---------- .../scalar/timedelta/test_constructors.py | 9 +++++- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c9a1dbd0ae90d..dfbbb456f50b6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -348,8 +348,7 @@ Datetimelike Timedelta ^^^^^^^^^ - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`) -- -- +- Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index ee32ed53a908b..c6b47d09cf0bd 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -604,7 +604,7 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: for c in ts: # number (ascii codes) - if ord(c) >= 48 and ord(c) <= 57: + if 48 <= ord(c) <= 57: have_value = 1 if have_dot: @@ -620,27 +620,28 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: if not len(unit): number.append(c) else: - # if in days, pop trailing T - if unit[-1] == 'T': - unit.pop() - elif 'H' in unit or 'M' in unit: - if len(number) > 2: - raise ValueError(err_msg) r = timedelta_from_spec(number, '0', unit) result += timedelta_as_neg(r, neg) neg = 0 unit, number = [], [c] else: - if c == 'P': - pass # ignore leading character + if c == 'P' or c == 'T': + pass # ignore marking characters P and T elif c == '-': if neg or have_value: raise ValueError(err_msg) else: neg = 1 - elif c in ['D', 'T', 'H', 'M']: + elif c in ['W', 'D', 'H', 'M']: unit.append(c) + if c in ['H', 'M'] and len(number) > 2: + raise ValueError(err_msg) + r = timedelta_from_spec(number, '0', unit) + result += timedelta_as_neg(r, neg) + + neg = 0 + unit, number = [], [] elif c == '.': # append any seconds if len(number): @@ -661,11 +662,8 @@ cdef inline int64_t parse_iso_format_string(str ts) except? -1: r = timedelta_from_spec(number, '0', dec_unit) result += timedelta_as_neg(r, neg) else: # seconds - if len(number) <= 2: - r = timedelta_from_spec(number, '0', 'S') - result += timedelta_as_neg(r, neg) - else: - raise ValueError(err_msg) + r = timedelta_from_spec(number, '0', 'S') + result += timedelta_as_neg(r, neg) else: raise ValueError(err_msg) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 23fb25b838da6..06bdb8a6cf0a2 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -228,6 +228,14 @@ def test_overflow_on_construction(): ("P0DT0H0M0.001S", Timedelta(milliseconds=1)), ("P0DT0H1M0S", Timedelta(minutes=1)), ("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)), + ("PT1S", Timedelta(seconds=1)), + ("PT0S", Timedelta(seconds=0)), + ("P1WT0S", Timedelta(days=7, seconds=0)), + ("P1D", Timedelta(days=1)), + ("P1DT1H", Timedelta(days=1, hours=1)), + ("P1W", Timedelta(days=7)), + ("PT300S", Timedelta(seconds=300)), + ("P1DT0H0M00000000000S", Timedelta(days=1)), ], ) def test_iso_constructor(fmt, exp): @@ -241,7 +249,6 @@ def test_iso_constructor(fmt, exp): "PDTHMS", "P0DT999H999M999S", "P1DT0H0M0.0000000000000S", - "P1DT0H0M00000000000S", "P1DT0H0M0.S", ], )