Skip to content

Commit

Permalink
ENH: add fold support to Timestamp constructor (pandas-dev#31563)
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexKirko authored and roberthdevries committed Mar 2, 2020
1 parent 1b55123 commit cbc9d41
Show file tree
Hide file tree
Showing 11 changed files with 354 additions and 37 deletions.
29 changes: 29 additions & 0 deletions doc/source/user_guide/timeseries.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2297,6 +2297,35 @@ To remove time zone information, use ``tz_localize(None)`` or ``tz_convert(None)
# tz_convert(None) is identical to tz_convert('UTC').tz_localize(None)
didx.tz_convert('UTC').tz_localize(None)
.. _timeseries.fold:

Fold
~~~~

.. versionadded:: 1.1.0

For ambiguous times, pandas supports explicitly specifying the keyword-only fold argument.
Due to daylight saving time, one wall clock time can occur twice when shifting
from summer to winter time; fold describes whether the datetime-like corresponds
to the first (0) or the second time (1) the wall clock hits the ambiguous time.
Fold is supported only for constructing from naive ``datetime.datetime``
(see `datetime documentation <https://docs.python.org/3/library/datetime.html>`__ for details) or from :class:`Timestamp`
or for constructing from components (see below). Only ``dateutil`` timezones are supported
(see `dateutil documentation <https://dateutil.readthedocs.io/en/stable/tz.html#dateutil.tz.enfold>`__
for ``dateutil`` methods that deal with ambiguous datetimes) as ``pytz``
timezones do not support fold (see `pytz documentation <http://pytz.sourceforge.net/index.html>`__
for details on how ``pytz`` deals with ambiguous datetimes). To localize an ambiguous datetime
with ``pytz``, please use :meth:`Timestamp.tz_localize`. In general, we recommend to rely
on :meth:`Timestamp.tz_localize` when localizing ambiguous datetimes if you need direct
control over how they are handled.

.. ipython:: python
pd.Timestamp(datetime.datetime(2019, 10, 27, 1, 30, 0, 0),
tz='dateutil/Europe/London', fold=0)
pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30,
tz='dateutil/Europe/London', fold=1)
.. _timeseries.timezone_ambiguous:

Ambiguous times when localizing
Expand Down
22 changes: 22 additions & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,28 @@ For example:
ser["2014"]
ser.loc["May 2015"]
.. _whatsnew_110.timestamp_fold_support:

Fold argument support in Timestamp constructor
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

:class:`Timestamp:` now supports the keyword-only fold argument according to `PEP 495 <https://www.python.org/dev/peps/pep-0495/#the-fold-attribute>`_ similar to parent ``datetime.datetime`` class. It supports both accepting fold as an initialization argument and inferring fold from other constructor arguments (:issue:`25057`, :issue:`31338`). Support is limited to ``dateutil`` timezones as ``pytz`` doesn't support fold.

For example:

.. ipython:: python
ts = pd.Timestamp("2019-10-27 01:30:00+00:00")
ts.fold
.. ipython:: python
ts = pd.Timestamp(year=2019, month=10, day=27, hour=1, minute=30,
tz="dateutil/Europe/London", fold=1)
ts
For more on working with fold, see :ref:`Fold subsection <timeseries.fold>` in the user guide.

.. _whatsnew_110.enhancements.other:

Other enhancements
Expand Down
32 changes: 20 additions & 12 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -49,30 +49,31 @@ from pandas._libs.tslibs.tzconversion cimport (

cdef inline object create_datetime_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
object tz, object freq, bint fold):
""" convenience routine to construct a datetime.datetime from its parts """
return datetime(dts.year, dts.month, dts.day, dts.hour,
dts.min, dts.sec, dts.us, tz)
dts.min, dts.sec, dts.us, tz, fold=fold)


cdef inline object create_date_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
object tz, object freq, bint fold):
""" convenience routine to construct a datetime.date from its parts """
# GH 25057 add fold argument to match other func_create signatures
return date(dts.year, dts.month, dts.day)


cdef inline object create_time_from_ts(
int64_t value, npy_datetimestruct dts,
object tz, object freq):
object tz, object freq, bint fold):
""" convenience routine to construct a datetime.time from its parts """
return time(dts.hour, dts.min, dts.sec, dts.us, tz)
return time(dts.hour, dts.min, dts.sec, dts.us, tz, fold=fold)


@cython.wraparound(False)
@cython.boundscheck(False)
def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
str box="datetime"):
bint fold=0, str box="datetime"):
"""
Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp
Expand All @@ -83,6 +84,13 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
convert to this timezone
freq : str/Offset, default None
freq to convert
fold : bint, default is 0
Due to daylight saving time, one wall clock time can occur twice
when shifting from summer to winter time; fold describes whether the
datetime-like corresponds to the first (0) or the second time (1)
the wall clock hits the ambiguous time
.. versionadded:: 1.1.0
box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
If datetime, convert to datetime.datetime
If date, convert to datetime.date
Expand All @@ -104,7 +112,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
str typ
int64_t value, delta, local_value
ndarray[object] result = np.empty(n, dtype=object)
object (*func_create)(int64_t, npy_datetimestruct, object, object)
object (*func_create)(int64_t, npy_datetimestruct, object, object, bint)

if box == "date":
assert (tz is None), "tz should be None when converting to date"
Expand All @@ -129,7 +137,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
result[i] = <object>NaT
else:
dt64_to_dtstruct(value, &dts)
result[i] = func_create(value, dts, tz, freq)
result[i] = func_create(value, dts, tz, freq, fold)
elif is_tzlocal(tz):
for i in range(n):
value = arr[i]
Expand All @@ -141,7 +149,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
# using the i8 representation.
local_value = tz_convert_utc_to_tzlocal(value, tz)
dt64_to_dtstruct(local_value, &dts)
result[i] = func_create(value, dts, tz, freq)
result[i] = func_create(value, dts, tz, freq, fold)
else:
trans, deltas, typ = get_dst_info(tz)

Expand All @@ -155,7 +163,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
else:
# Adjust datetime64 timestamp, recompute datetimestruct
dt64_to_dtstruct(value + delta, &dts)
result[i] = func_create(value, dts, tz, freq)
result[i] = func_create(value, dts, tz, freq, fold)

elif typ == 'dateutil':
# no zone-name change for dateutil tzs - dst etc
Expand All @@ -168,7 +176,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
# Adjust datetime64 timestamp, recompute datetimestruct
pos = trans.searchsorted(value, side='right') - 1
dt64_to_dtstruct(value + deltas[pos], &dts)
result[i] = func_create(value, dts, tz, freq)
result[i] = func_create(value, dts, tz, freq, fold)
else:
# pytz
for i in range(n):
Expand All @@ -182,7 +190,7 @@ def ints_to_pydatetime(const int64_t[:] arr, object tz=None, object freq=None,
new_tz = tz._tzinfos[tz._transition_info[pos]]

dt64_to_dtstruct(value + deltas[pos], &dts)
result[i] = func_create(value, dts, new_tz, freq)
result[i] = func_create(value, dts, new_tz, freq, fold)

return result

Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/conversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cdef class _TSObject:
npy_datetimestruct dts # npy_datetimestruct
int64_t value # numpy dt64
object tzinfo
bint fold


cdef convert_to_tsobject(object ts, object tz, object unit,
Expand Down
69 changes: 66 additions & 3 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ from pandas._libs.tslibs.nattype cimport (

from pandas._libs.tslibs.tzconversion import (
tz_localize_to_utc, tz_convert_single)
from pandas._libs.tslibs.tzconversion cimport _tz_convert_tzlocal_utc
from pandas._libs.tslibs.tzconversion cimport (
_tz_convert_tzlocal_utc, _tz_convert_tzlocal_fromutc)

# ----------------------------------------------------------------------
# Constants
Expand Down Expand Up @@ -215,6 +216,11 @@ cdef class _TSObject:
# npy_datetimestruct dts # npy_datetimestruct
# int64_t value # numpy dt64
# object tzinfo
# bint fold

def __cinit__(self):
# GH 25057. As per PEP 495, set fold to 0 by default
self.fold = 0

@property
def value(self):
Expand Down Expand Up @@ -322,6 +328,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
cdef:
_TSObject obj = _TSObject()

obj.fold = ts.fold
if tz is not None:
tz = maybe_get_tz(tz)

Expand Down Expand Up @@ -380,6 +387,8 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts,
_TSObject obj = _TSObject()
int64_t value # numpy dt64
datetime dt
ndarray[int64_t] trans
int64_t[:] deltas

value = dtstruct_to_dt64(&dts)
obj.dts = dts
Expand All @@ -389,10 +398,23 @@ cdef _TSObject create_tsobject_tz_using_offset(npy_datetimestruct dts,
check_overflows(obj)
return obj

# Infer fold from offset-adjusted obj.value
# see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
if is_utc(tz):
pass
elif is_tzlocal(tz):
_tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold)
else:
trans, deltas, typ = get_dst_info(tz)

if typ == 'dateutil':
pos = trans.searchsorted(obj.value, side='right') - 1
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)

# Keep the converter same as PyDateTime's
dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
obj.dts.hour, obj.dts.min, obj.dts.sec,
obj.dts.us, obj.tzinfo)
obj.dts.us, obj.tzinfo, fold=obj.fold)
obj = convert_datetime_to_tsobject(
dt, tz, nanos=obj.dts.ps // 1000)
return obj
Expand Down Expand Up @@ -543,7 +565,7 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
elif obj.value == NPY_NAT:
pass
elif is_tzlocal(tz):
local_val = _tz_convert_tzlocal_utc(obj.value, tz, to_utc=False)
local_val = _tz_convert_tzlocal_fromutc(obj.value, tz, &obj.fold)
dt64_to_dtstruct(local_val, &obj.dts)
else:
# Adjust datetime64 timestamp, recompute datetimestruct
Expand All @@ -562,6 +584,8 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
# i.e. treat_tz_as_dateutil(tz)
pos = trans.searchsorted(obj.value, side='right') - 1
dt64_to_dtstruct(obj.value + deltas[pos], &obj.dts)
# dateutil supports fold, so we infer fold from value
obj.fold = _infer_tsobject_fold(obj, trans, deltas, pos)
else:
# Note: as of 2018-07-17 all tzinfo objects that are _not_
# either pytz or dateutil have is_fixed_offset(tz) == True,
Expand All @@ -571,6 +595,45 @@ cdef inline void localize_tso(_TSObject obj, tzinfo tz):
obj.tzinfo = tz


cdef inline bint _infer_tsobject_fold(_TSObject obj, ndarray[int64_t] trans,
int64_t[:] deltas, int32_t pos):
"""
Infer _TSObject fold property from value by assuming 0 and then setting
to 1 if necessary.
Parameters
----------
obj : _TSObject
trans : ndarray[int64_t]
ndarray of offset transition points in nanoseconds since epoch.
deltas : int64_t[:]
array of offsets corresponding to transition points in trans.
pos : int32_t
Position of the last transition point before taking fold into account.
Returns
-------
bint
Due to daylight saving time, one wall clock time can occur twice
when shifting from summer to winter time; fold describes whether the
datetime-like corresponds to the first (0) or the second time (1)
the wall clock hits the ambiguous time
References
----------
.. [1] "PEP 495 - Local Time Disambiguation"
https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
"""
cdef:
bint fold = 0

if pos > 0:
fold_delta = deltas[pos - 1] - deltas[pos]
if obj.value - fold_delta < trans[pos]:
fold = 1

return fold

cdef inline datetime _localize_pydatetime(datetime dt, tzinfo tz):
"""
Take a datetime/Timestamp in UTC and localizes to timezone tz.
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/tslibs/timestamps.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ from pandas._libs.tslibs.np_datetime cimport npy_datetimestruct

cdef object create_timestamp_from_ts(int64_t value,
npy_datetimestruct dts,
object tz, object freq)
object tz, object freq, bint fold)
Loading

0 comments on commit cbc9d41

Please sign in to comment.