Skip to content

Commit

Permalink
CLN: tzconversion (#46926)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored May 2, 2022
1 parent d37643a commit 60ac973
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 64 deletions.
12 changes: 0 additions & 12 deletions pandas/_libs/tslibs/tzconversion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,13 @@ from numpy cimport (
)


cdef int64_t localize_tzinfo_api(
int64_t utc_val, tzinfo tz, bint* fold=*
) except? -1
cdef int64_t tz_convert_from_utc_single(
int64_t utc_val, tzinfo tz, bint* fold=?, Py_ssize_t* outpos=?
) except? -1
cdef int64_t tz_localize_to_utc_single(
int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=*
) except? -1

cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n)

cdef bint infer_dateutil_fold(
int64_t value,
const int64_t[::1] trans,
const int64_t[::1] deltas,
Py_ssize_t pos,
)


cdef class Localizer:
cdef:
Expand Down
119 changes: 67 additions & 52 deletions pandas/_libs/tslibs/tzconversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64)


@cython.freelist(16)
#@cython.internal
@cython.final
cdef class Localizer:
# cdef:
Expand Down Expand Up @@ -102,13 +101,15 @@ cdef class Localizer:
if self.use_utc:
return utc_val
elif self.use_tzlocal:
return utc_val + localize_tzinfo_api(utc_val, self.tz, fold)
return utc_val + _tz_localize_using_tzinfo_api(
utc_val, self.tz, to_utc=False, fold=fold
)
elif self.use_fixed:
return utc_val + self.delta
else:
pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1
if fold is not NULL:
fold[0] = infer_dateutil_fold(
fold[0] = _infer_dateutil_fold(
utc_val, self.trans, self.deltas, pos[0]
)

Expand Down Expand Up @@ -184,10 +185,10 @@ timedelta-like}
cdef:
const int64_t[::1] deltas
ndarray[uint8_t, cast=True] ambiguous_array
Py_ssize_t i, isl, isr, idx, pos, ntrans, n = vals.shape[0]
Py_ssize_t i, idx, pos, ntrans, n = vals.shape[0]
Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right
int64_t *tdata
int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins
int64_t v, left, right, val, new_local, remaining_mins
int64_t first_delta, delta
int64_t shift_delta = 0
ndarray[int64_t] trans, result_a, result_b, dst_hours
Expand All @@ -202,7 +203,7 @@ timedelta-like}
if is_utc(tz) or tz is None:
return vals.copy()

result = np.empty(n, dtype=np.int64)
result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)

if is_tzlocal(tz) or is_zoneinfo(tz):
for i in range(n):
Expand Down Expand Up @@ -265,40 +266,7 @@ timedelta-like}

# Determine whether each date lies left of the DST transition (store in
# result_a) or right of the DST transition (store in result_b)
result_a = np.empty(n, dtype=np.int64)
result_b = np.empty(n, dtype=np.int64)

for i in range(n):
# This loops resembles the "Find the two best possibilities" block
# in pytz's DstTZInfo.localize method.
result_a[i] = NPY_NAT
result_b[i] = NPY_NAT

val = vals[i]
if val == NPY_NAT:
continue

# TODO: be careful of overflow in val-DAY_NANOS
isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1
if isl < 0:
isl = 0

v_left = val - deltas[isl]
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
# timestamp falls to the left side of the DST transition
if v_left + deltas[pos_left] == val:
result_a[i] = v_left

# TODO: be careful of overflow in val+DAY_NANOS
isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1
if isr < 0:
isr = 0

v_right = val - deltas[isr]
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
# timestamp falls to the right side of the DST transition
if v_right + deltas[pos_right] == val:
result_b[i] = v_right
result_a, result_b =_get_utc_bounds(vals, tdata, ntrans, deltas)

# silence false-positive compiler warning
dst_hours = np.empty(0, dtype=np.int64)
Expand Down Expand Up @@ -417,6 +385,59 @@ cdef inline str _render_tstamp(int64_t val):
return str(Timestamp(val))


cdef _get_utc_bounds(
ndarray vals,
int64_t* tdata,
Py_ssize_t ntrans,
const int64_t[::1] deltas,
):
# Determine whether each date lies left of the DST transition (store in
# result_a) or right of the DST transition (store in result_b)

cdef:
ndarray result_a, result_b
Py_ssize_t i, n = vals.size
int64_t val, v_left, v_right
Py_ssize_t isl, isr, pos_left, pos_right

result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)

for i in range(n):
# This loops resembles the "Find the two best possibilities" block
# in pytz's DstTZInfo.localize method.
result_a[i] = NPY_NAT
result_b[i] = NPY_NAT

val = vals[i]
if val == NPY_NAT:
continue

# TODO: be careful of overflow in val-DAY_NANOS
isl = bisect_right_i8(tdata, val - DAY_NANOS, ntrans) - 1
if isl < 0:
isl = 0

v_left = val - deltas[isl]
pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
# timestamp falls to the left side of the DST transition
if v_left + deltas[pos_left] == val:
result_a[i] = v_left

# TODO: be careful of overflow in val+DAY_NANOS
isr = bisect_right_i8(tdata, val + DAY_NANOS, ntrans) - 1
if isr < 0:
isr = 0

v_right = val - deltas[isr]
pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
# timestamp falls to the right side of the DST transition
if v_right + deltas[pos_right] == val:
result_b[i] = v_right

return result_a, result_b


@cython.boundscheck(False)
cdef ndarray[int64_t] _get_dst_hours(
# vals only needed here to potential render an exception message
Expand All @@ -433,10 +454,10 @@ cdef ndarray[int64_t] _get_dst_hours(
intp_t switch_idx
int64_t left, right

dst_hours = np.empty(n, dtype=np.int64)
dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0)
dst_hours[:] = NPY_NAT

mismatch = np.zeros(n, dtype=bool)
mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0)

for i in range(n):
left = result_a[i]
Expand All @@ -450,6 +471,7 @@ cdef ndarray[int64_t] _get_dst_hours(
trans_idx = mismatch.nonzero()[0]

if trans_idx.size == 1:
# TODO: not reached in tests 2022-05-02; possible?
stamp = _render_tstamp(vals[trans_idx[0]])
raise pytz.AmbiguousTimeError(
f"Cannot infer dst time from {stamp} as there "
Expand All @@ -471,13 +493,15 @@ cdef ndarray[int64_t] _get_dst_hours(

delta = np.diff(result_a[grp])
if grp.size == 1 or np.all(delta > 0):
# TODO: not reached in tests 2022-05-02; possible?
stamp = _render_tstamp(vals[grp[0]])
raise pytz.AmbiguousTimeError(stamp)

# Find the index for the switch and pull from a for dst and b
# for standard
switch_idxs = (delta <= 0).nonzero()[0]
if switch_idxs.size > 1:
# TODO: not reached in tests 2022-05-02; possible?
raise pytz.AmbiguousTimeError(
f"There are {switch_idxs.size} dst switches when "
"there should only be 1."
Expand All @@ -495,15 +519,6 @@ cdef ndarray[int64_t] _get_dst_hours(
# ----------------------------------------------------------------------
# Timezone Conversion

cdef int64_t localize_tzinfo_api(
int64_t utc_val, tzinfo tz, bint* fold=NULL
) except? -1:
"""
See _tz_localize_using_tzinfo_api.__doc__
"""
return _tz_localize_using_tzinfo_api(utc_val, tz, to_utc=False, fold=fold)


def py_tz_convert_from_utc_single(int64_t utc_val, tzinfo tz):
# The 'bint* fold=NULL' in tz_convert_from_utc_single means we cannot
# make it cdef, so this is version exposed for testing from python.
Expand Down Expand Up @@ -608,7 +623,7 @@ cdef int64_t _tz_localize_using_tzinfo_api(
# NB: relies on dateutil internals, subject to change.
@cython.boundscheck(False)
@cython.wraparound(False)
cdef bint infer_dateutil_fold(
cdef bint _infer_dateutil_fold(
int64_t value,
const int64_t[::1] trans,
const int64_t[::1] deltas,
Expand Down

0 comments on commit 60ac973

Please sign in to comment.