Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into refactor_series_con…
Browse files Browse the repository at this point in the history
…structor
  • Loading branch information
aureliobarbosa committed Mar 21, 2024
2 parents 7e01783 + 41383cf commit e5e4426
Show file tree
Hide file tree
Showing 18 changed files with 169 additions and 145 deletions.
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def setup(self):
self.dt_ts = Series(5, rng3, dtype="datetime64[ns]")

def time_resample(self):
self.dt_ts.resample("1S").last()
self.dt_ts.resample("1s").last()


class AsOf:
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ Removal of prior version deprecations/changes
- Enforced deprecation of string ``A`` denoting frequency in :class:`YearEnd` and strings ``A-DEC``, ``A-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57699`)
- Enforced deprecation of string ``BAS`` denoting frequency in :class:`BYearBegin` and strings ``BAS-DEC``, ``BAS-JAN``, etc. denoting annual frequencies with various fiscal year starts (:issue:`57793`)
- Enforced deprecation of string ``BA`` denoting frequency in :class:`BYearEnd` and strings ``BA-DEC``, ``BA-JAN``, etc. denoting annual frequencies with various fiscal year ends (:issue:`57793`)
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`)
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/hashtable.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ cdef class Int64Vector(Vector):
cdef Int64VectorData data
cdef ndarray ao

cdef resize(self)
cdef resize(self, Py_ssize_t new_size)
cpdef ndarray to_array(self)
cdef void append(self, int64_t x) noexcept
cdef extend(self, int64_t[:] x)
1 change: 1 addition & 0 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ from libc.stdlib cimport (
free,
malloc,
)
from libc.string cimport memcpy

import numpy as np

Expand Down
50 changes: 31 additions & 19 deletions pandas/_libs/hashtable_class_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,9 @@ ctypedef fused vector_data:
Complex64VectorData
StringVectorData

cdef bint needs_resize(vector_data *data) noexcept nogil:
return data.size == data.capacity

cdef bint needs_resize(Py_ssize_t nelems, Py_ssize_t capacity) noexcept nogil:
return nelems >= capacity

# ----------------------------------------------------------------------
# Vector
Expand Down Expand Up @@ -214,8 +215,8 @@ cdef class {{name}}Vector(Vector):
self.ao = np.empty(self.data.capacity, dtype=np.{{dtype}})
self.data.data = <{{c_type}}*>self.ao.data

cdef resize(self):
self.data.capacity = max(self.data.capacity * 4, _INIT_VEC_CAP)
cdef resize(self, Py_ssize_t new_size):
self.data.capacity = max(new_size, _INIT_VEC_CAP)
self.ao.resize(self.data.capacity, refcheck=False)
self.data.data = <{{c_type}}*>self.ao.data

Expand All @@ -234,17 +235,28 @@ cdef class {{name}}Vector(Vector):

cdef void append(self, {{c_type}} x) noexcept:

if needs_resize(&self.data):
if needs_resize(self.data.size, self.data.capacity):
if self.external_view_exists:
raise ValueError("external reference but "
"Vector.resize() needed")
self.resize()
self.resize(self.data.capacity * 4)

append_data_{{dtype}}(&self.data, x)

cdef extend(self, const {{c_type}}[:] x):
for i in range(len(x)):
self.append(x[i])
cdef Py_ssize_t x_size = len(x)
if x_size == 0:
return

cdef Py_ssize_t needed_size = self.data.size + x_size
if needs_resize(needed_size, self.data.capacity):
if self.external_view_exists:
raise ValueError("external reference but "
"Vector.resize() needed")
self.resize(needed_size)

memcpy(self.data.data + self.data.size, &x[0], x_size * sizeof({{c_type}}))
self.data.size = needed_size

{{endfor}}

Expand All @@ -260,7 +272,7 @@ cdef class StringVector(Vector):
if self.data.data is NULL:
raise MemoryError()

cdef resize(self):
cdef resize(self, Py_ssize_t new_size):
cdef:
char **orig_data
Py_ssize_t i, orig_capacity
Expand Down Expand Up @@ -297,8 +309,8 @@ cdef class StringVector(Vector):

cdef void append(self, char *x) noexcept:

if needs_resize(&self.data):
self.resize()
if needs_resize(self.data.size, self.data.capacity):
self.resize(self.data.capacity * 4)

append_data_string(&self.data, x)

Expand Down Expand Up @@ -684,18 +696,18 @@ cdef class {{name}}HashTable(HashTable):
continue

seen_na = True
if needs_resize(ud):
if needs_resize(ud.size, ud.capacity):
with gil:
if uniques.external_view_exists:
raise ValueError("external reference to "
"uniques held, but "
"Vector.resize() needed")
uniques.resize()
uniques.resize(uniques.data.capacity * 4)
if result_mask.external_view_exists:
raise ValueError("external reference to "
"result_mask held, but "
"Vector.resize() needed")
result_mask.resize()
result_mask.resize(result_mask.data.capacity * 4)
append_data_{{dtype}}(ud, val)
append_data_uint8(rmd, 1)
continue
Expand All @@ -706,19 +718,19 @@ cdef class {{name}}HashTable(HashTable):
# k hasn't been seen yet
k = kh_put_{{dtype}}(self.table, val, &ret)

if needs_resize(ud):
if needs_resize(ud.size, ud.capacity):
with gil:
if uniques.external_view_exists:
raise ValueError("external reference to "
"uniques held, but "
"Vector.resize() needed")
uniques.resize()
uniques.resize(uniques.data.capacity * 4)
if use_result_mask:
if result_mask.external_view_exists:
raise ValueError("external reference to "
"result_mask held, but "
"Vector.resize() needed")
result_mask.resize()
result_mask.resize(result_mask.data.capacity * 4)
append_data_{{dtype}}(ud, val)
if use_result_mask:
append_data_uint8(rmd, 0)
Expand Down Expand Up @@ -849,9 +861,9 @@ cdef class {{name}}HashTable(HashTable):
k = kh_put_{{dtype}}(self.table, val, &ret)
self.table.vals[k] = count

if needs_resize(ud):
if needs_resize(ud.size, ud.capacity):
with gil:
uniques.resize()
uniques.resize(uniques.data.capacity * 4)
append_data_{{dtype}}(ud, val)
labels[i] = count
count += 1
Expand Down
4 changes: 2 additions & 2 deletions pandas/_libs/hashtable_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -480,9 +480,9 @@ def _unique_label_indices_{{dtype}}(const {{c_type}}[:] labels) -> ndarray:
for i in range(n):
kh_put_{{ttype}}(table, labels[i], &ret)
if ret != 0:
if needs_resize(ud):
if needs_resize(ud.size, ud.capacity):
with gil:
idx.resize()
idx.resize(idx.data.capacity * 4)
append_data_{{ttype}}(ud, i)

kh_destroy_{{ttype}}(table)
Expand Down
4 changes: 4 additions & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -231,3 +231,7 @@ def is_range_indexer(
left: np.ndarray,
n: int, # np.ndarray[np.int64, ndim=1]
) -> bool: ...
def is_sequence_range(
sequence: np.ndarray,
step: int, # np.ndarray[np.int64, ndim=1]
) -> bool: ...
22 changes: 22 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,28 @@ def is_range_indexer(ndarray[int6432_t, ndim=1] left, Py_ssize_t n) -> bool:
return True


@cython.wraparound(False)
@cython.boundscheck(False)
def is_sequence_range(ndarray[int6432_t, ndim=1] sequence, int64_t step) -> bool:
"""
Check if sequence is equivalent to a range with the specified step.
"""
cdef:
Py_ssize_t i, n = len(sequence)
int6432_t first_element

if step == 0:
return False
if n == 0:
return True

first_element = sequence[0]
for i in range(1, n):
if sequence[i] != first_element + i * step:
return False
return True


ctypedef fused ndarr_object:
ndarray[object, ndim=1]
ndarray[object, ndim=2]
Expand Down
14 changes: 5 additions & 9 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -313,15 +313,7 @@ cdef dict c_DEPR_ABBREVS = {
"H": "h",
"BH": "bh",
"CBH": "cbh",
"T": "min",
"t": "min",
"S": "s",
"L": "ms",
"l": "ms",
"U": "us",
"u": "us",
"N": "ns",
"n": "ns",
}


Expand Down Expand Up @@ -415,13 +407,17 @@ class Resolution(Enum):
"""
cdef:
str abbrev
if freq in {"T", "t", "L", "l", "U", "u", "N", "n"}:
raise ValueError(
f"Frequency \'{freq}\' is no longer supported."
)
try:
if freq in c_DEPR_ABBREVS:
abbrev = c_DEPR_ABBREVS[freq]
warnings.warn(
f"\'{freq}\' is deprecated and will be removed in a future "
f"version. Please use \'{abbrev}\' "
"instead of \'{freq}\'.",
f"instead of \'{freq}\'.",
FutureWarning,
stacklevel=find_stack_level(),
)
Expand Down
10 changes: 2 additions & 8 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7169,7 +7169,7 @@ def maybe_sequence_to_range(sequence) -> Any | range:
-------
Any : input or range
"""
if isinstance(sequence, (ABCSeries, Index)):
if isinstance(sequence, (ABCSeries, Index, range)):
return sequence
np_sequence = np.asarray(sequence)
if np_sequence.dtype.kind != "i" or len(np_sequence) == 1:
Expand All @@ -7179,13 +7179,7 @@ def maybe_sequence_to_range(sequence) -> Any | range:
diff = np_sequence[1] - np_sequence[0]
if diff == 0:
return sequence
elif len(np_sequence) == 2:
return range(np_sequence[0], np_sequence[1] + diff, diff)
maybe_range_indexer, remainder = np.divmod(np_sequence - np_sequence[0], diff)
if (
lib.is_range_indexer(maybe_range_indexer, len(maybe_range_indexer))
and not remainder.any()
):
elif len(np_sequence) == 2 or lib.is_sequence_range(np_sequence, diff):
return range(np_sequence[0], np_sequence[-1] + diff, diff)
else:
return sequence
Expand Down
13 changes: 6 additions & 7 deletions pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,18 +112,17 @@ def to_timedelta(
* 'W'
* 'D' / 'days' / 'day'
* 'hours' / 'hour' / 'hr' / 'h' / 'H'
* 'm' / 'minute' / 'min' / 'minutes' / 'T'
* 'm' / 'minute' / 'min' / 'minutes'
* 's' / 'seconds' / 'sec' / 'second' / 'S'
* 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
* 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
* 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'
* 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis'
* 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros'
* 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond'

Must not be specified when `arg` contains strings and ``errors="raise"``.

.. deprecated:: 2.2.0
Units 'H', 'T', 'S', 'L', 'U' and 'N' are deprecated and will be removed
in a future version. Please use 'h', 'min', 's', 'ms', 'us', and 'ns'
instead of 'H', 'T', 'S', 'L', 'U' and 'N'.
Units 'H'and 'S' are deprecated and will be removed
in a future version. Please use 'h' and 's'.

errors : {'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception.
Expand Down
29 changes: 5 additions & 24 deletions pandas/tests/indexes/datetimes/test_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,30 +772,11 @@ def test_freq_dateoffset_with_relateivedelta_nanos(self):
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"freq,freq_depr",
[
("h", "H"),
("2min", "2T"),
("1s", "1S"),
("2ms", "2L"),
("1us", "1U"),
("2ns", "2N"),
],
)
def test_frequencies_H_T_S_L_U_N_deprecated(self, freq, freq_depr):
# GH#52536
freq_msg = re.split("[0-9]*", freq, maxsplit=1)[1]
freq_depr_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
msg = (
f"'{freq_depr_msg}' is deprecated and will be removed in a future version, "
)
f"please use '{freq_msg}' instead"

expected = date_range("1/1/2000", periods=2, freq=freq)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = date_range("1/1/2000", periods=2, freq=freq_depr)
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("freq", ["2T", "2L", "1l", "1U", "2N", "2n"])
def test_frequency_H_T_S_L_U_N_raises(self, freq):
msg = f"Invalid frequency: {freq}"
with pytest.raises(ValueError, match=msg):
date_range("1/1/2000", periods=2, freq=freq)

@pytest.mark.parametrize(
"freq,freq_depr",
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/indexes/period/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,16 @@ def test_period_index_from_datetime_index_invalid_freq(self, freq):
with pytest.raises(ValueError, match=msg):
rng.to_period()

@pytest.mark.parametrize("freq_depr", ["2T", "1l", "2U", "n"])
def test_period_index_T_L_U_N_raises(self, freq_depr):
# GH#9586
msg = f"Invalid frequency: {freq_depr}"

with pytest.raises(ValueError, match=msg):
period_range("2020-01", "2020-05", freq=freq_depr)
with pytest.raises(ValueError, match=msg):
PeriodIndex(["2020-01", "2020-05"], freq=freq_depr)


class TestPeriodIndex:
def test_from_ordinals(self):
Expand Down
Loading

0 comments on commit e5e4426

Please sign in to comment.