From dad8d355577e8f1b51968b7e9e98799e55bb9499 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 25 Feb 2023 18:40:10 -0500 Subject: [PATCH 01/11] COMPAT: Update code for latest Cython 3 beta --- pandas/_libs/algos.pyx | 2 +- pandas/_libs/lib.pyx | 8 ++++---- pandas/_libs/tslibs/offsets.pyx | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 8 +++++--- pandas/_libs/window/aggregations.pyx | 4 +++- 5 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 81fd5792ee9e5..1f701a871abe9 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -166,7 +166,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): @cython.wraparound(False) @cython.boundscheck(False) -def is_lexsorted(list_of_arrays: list) -> bint: +def is_lexsorted(list_of_arrays: list) -> bool: cdef: Py_ssize_t i Py_ssize_t n, nlevels diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index d20f87ec91d1b..0c22f9bec6fde 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -150,11 +150,11 @@ def memory_usage_of_objects(arr: object[:]) -> int64_t: Does not include the actual bytes of the pointers """ - i: Py_ssize_t - n: Py_ssize_t - size: int64_t + cdef: + Py_ssize_t i + Py_ssize_t n + int64_t size = 0 - size = 0 n = len(arr) for i in range(n): size += arr[i].__sizeof__() diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e8e0ddbb404de..7d678c60a2737 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4563,7 +4563,7 @@ def roll_qtrday(other: datetime, n: int, month: int, cdef int _roll_qtrday(npy_datetimestruct* dts, int n, int months_since, - str day_opt) nogil except? -1: + str day_opt) except? -1 nogil: """ See roll_qtrday.__doc__ """ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index d68fea7d08b2a..3eba5c9e3a01f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -614,12 +614,14 @@ cdef int64_t parse_timedelta_string(str ts) except? -1: and current_unit is None): raise ValueError("no units specified") + # Need absolute value here, since Cython can't figure + # out exponent can't be negative by itself if len(frac) > 0 and len(frac) <= 3: - m = 10**(3 -len(frac)) * 1000 * 1000 + m = 10**(abs(3 -len(frac))) * 1000 * 1000 elif len(frac) > 3 and len(frac) <= 6: - m = 10**(6 -len(frac)) * 1000 + m = 10**(abs(6 -len(frac))) * 1000 elif len(frac) > 6 and len(frac) <= 9: - m = 10**(9 -len(frac)) + m = 10**(abs(9 -len(frac))) else: m = 1 frac = frac[:9] diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 3055b8ff48cc9..511df25c3a87f 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -21,6 +21,8 @@ from numpy cimport ( cnp.import_array() +import cython + from pandas._libs.algos import is_monotonic @@ -1733,7 +1735,7 @@ def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights, # ---------------------------------------------------------------------- # Exponentially weighted moving - +@cython.cpow(True) def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end, int minp, float64_t com, bint adjust, bint ignore_na, const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray: From 44d5a34fc9cc8be18a7fc274336d4042edebbf6e Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 25 Feb 2023 20:09:05 -0500 Subject: [PATCH 02/11] Update ubuntu.yml --- .github/workflows/ubuntu.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 7b31c5392849b..a3f53dd0520a2 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -76,6 +76,9 @@ jobs: env_file: actions-310-numpydev.yaml pattern: "not slow and not network and not single_cpu" test_args: "-W error::DeprecationWarning -W error::FutureWarning" + # TODO(cython3): Re-enable once next-beta(after beta 1) comes out + # There are some warnings failing the build with -werror + PANDAS_CI: 0 exclude: - env_file: actions-38.yaml pyarrow_version: "8" From cade082aade411a308f8f19090b6f5cfac19600c Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 25 Feb 2023 20:28:45 -0500 Subject: [PATCH 03/11] Update ubuntu.yml --- .github/workflows/ubuntu.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index a3f53dd0520a2..4e5e3bacc6c18 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -14,9 +14,6 @@ on: paths-ignore: - "doc/**" -env: - PANDAS_CI: 1 - permissions: contents: read @@ -32,6 +29,7 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] pattern: ["not single_cpu", "single_cpu"] pyarrow_version: ["8", "9", "10"] + PANDAS_CI: 1 include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml From 64e5a95508431acd2fa4e140ea854ccf1f0fb4f3 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 25 Feb 2023 20:31:33 -0500 Subject: [PATCH 04/11] Update ubuntu.yml --- .github/workflows/ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 4e5e3bacc6c18..ad27134e4dbca 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -29,7 +29,7 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] pattern: ["not single_cpu", "single_cpu"] pyarrow_version: ["8", "9", "10"] - PANDAS_CI: 1 + PANDAS_CI: [1] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml From f19edf1a8c2bf610f80e003cf714ec6cd78baab2 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 25 Feb 2023 21:33:18 -0500 Subject: [PATCH 05/11] correctly propagate PANDAS_CI --- .github/workflows/ubuntu.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index ad27134e4dbca..2ca22e6356b11 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -29,7 +29,7 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] pattern: ["not single_cpu", "single_cpu"] pyarrow_version: ["8", "9", "10"] - PANDAS_CI: [1] + pandas_ci: [1] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml @@ -76,7 +76,7 @@ jobs: test_args: "-W error::DeprecationWarning -W error::FutureWarning" # TODO(cython3): Re-enable once next-beta(after beta 1) comes out # There are some warnings failing the build with -werror - PANDAS_CI: 0 + pandas_ci: 0 exclude: - env_file: actions-38.yaml pyarrow_version: "8" @@ -100,6 +100,7 @@ jobs: LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }} PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} + PANDAS_CI: ${{ matrix.pandas_ci }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }} PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} From 205c42db9df4d27442d4a15818b89bfceb24826b Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 26 Feb 2023 11:42:25 -0500 Subject: [PATCH 06/11] Fix more pow --- pandas/_libs/sparse_op_helper.pxi.in | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index ac0c705fafbcd..774a8c579f6ce 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -128,7 +128,9 @@ def get_dispatch(dtypes): {{for opname, dtype, rdtype in get_dispatch(dtypes)}} - +{{if opname == "pow"}} +@cython.cpow(True) # Cython 3 matches Python pow, which isn't what we want here +{{endif}} @cython.wraparound(False) @cython.boundscheck(False) cdef tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, @@ -229,7 +231,9 @@ cdef tuple block_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, return out, out_index, {{(opname, 'xfill', 'yfill', dtype) | get_op}} - +{{if opname == "pow"}} +@cython.cpow(True) # Cython 3 matches Python pow, which isn't what we want here +{{endif}} @cython.wraparound(False) @cython.boundscheck(False) cdef tuple int_op_{{opname}}_{{dtype}}({{dtype}}_t[:] x_, From 2e744fe442d722be75c97ecefff48addaf3b69bf Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 26 Feb 2023 12:30:26 -0500 Subject: [PATCH 07/11] hack to force Cython to realize correct specialization in join --- pandas/_libs/join.pyx | 14 ++++++++++---- pandas/core/reshape/merge.py | 8 +++++++- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 667eda1b1f1da..5300572f42163 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -834,8 +834,8 @@ def asof_join_forward_on_X_by_Y(numeric_t[:] left_values, return left_indexer, right_indexer -def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values, - numeric_t[:] right_values, +def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values, + ndarray[numeric_t] right_values, by_t[:] left_by_values, by_t[:] right_by_values, bint allow_exact_matches=True, @@ -850,7 +850,13 @@ def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values, numeric_t bdiff, fdiff # search both forward and backward - bli, bri = asof_join_backward_on_X_by_Y( + # TODO(cython3): + # Bug in beta1 preventing Cython from choosing + # right specialization when one fused memview is None + # Doesn't matter what type we choose + # (nothing happens anyways since it is None) + # GH 51640 + bli, bri = asof_join_backward_on_X_by_Y[f"{left_values.dtype}_t", object]( left_values, right_values, left_by_values, @@ -859,7 +865,7 @@ def asof_join_nearest_on_X_by_Y(numeric_t[:] left_values, tolerance, use_hashtable ) - fli, fri = asof_join_forward_on_X_by_Y( + fli, fri = asof_join_forward_on_X_by_Y[f"{left_values.dtype}_t", object]( left_values, right_values, left_by_values, diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 86bf9bf95e1cc..2752efd850933 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2160,7 +2160,13 @@ def injection(obj): else: # choose appropriate function by type func = _asof_by_function(self.direction) - return func( + # TODO(cython3): + # Bug in beta1 preventing Cython from choosing + # right specialization when one fused memview is None + # Doesn't matter what type we choose + # (nothing happens anyways since it is None) + # GH 51640 + return func[f"{left_values.dtype}_t", object]( left_values, right_values, None, From 7a3e4fb01734d428c9711205fefb12fd79fd9182 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 26 Feb 2023 12:38:00 -0500 Subject: [PATCH 08/11] use cpow in timedeltas.pyx --- pandas/_libs/tslibs/timedeltas.pyx | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3eba5c9e3a01f..8f8be37b9f23f 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -489,6 +489,7 @@ cdef int64_t _item_to_timedelta64( raise +@cython.cpow(True) cdef int64_t parse_timedelta_string(str ts) except? -1: """ Parse a regular format timedelta string. Return an int64_t (in ns) @@ -614,14 +615,12 @@ cdef int64_t parse_timedelta_string(str ts) except? -1: and current_unit is None): raise ValueError("no units specified") - # Need absolute value here, since Cython can't figure - # out exponent can't be negative by itself if len(frac) > 0 and len(frac) <= 3: - m = 10**(abs(3 -len(frac))) * 1000 * 1000 + m = 10**(3 -len(frac)) * 1000 * 1000 elif len(frac) > 3 and len(frac) <= 6: - m = 10**(abs(6 -len(frac))) * 1000 + m = 10**(6 -len(frac)) * 1000 elif len(frac) > 6 and len(frac) <= 9: - m = 10**(abs(9 -len(frac))) + m = 10**(9 -len(frac)) else: m = 1 frac = frac[:9] From d09ee0d1eeb28ea3b891341b71a387328aa23f78 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 26 Feb 2023 13:07:47 -0500 Subject: [PATCH 09/11] bump cython --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index e748d20d6d6f0..aed83f8f434ba 100644 --- a/environment.yml +++ b/environment.yml @@ -8,7 +8,7 @@ dependencies: # build dependencies - versioneer[toml] - - cython=0.29.32 + - cython=0.29.33 # test dependencies - pytest>=7.0.0 From a46ffb9eaabfba66cc0d0335a269a2a6479e5a34 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 26 Feb 2023 15:11:26 -0500 Subject: [PATCH 10/11] bump cython here as well --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 0329588de17fd..878d6f49f3358 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,7 +3,7 @@ pip versioneer[toml] -cython==0.29.32 +cython==0.29.33 pytest>=7.0.0 pytest-cov pytest-xdist>=2.2.0 From 4f2bb81b92f1d1b282694a54cc96000ae8325f3c Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Mon, 27 Feb 2023 11:45:34 -0500 Subject: [PATCH 11/11] fixes --- pandas/_libs/join.pyx | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 5300572f42163..2b3b147470cef 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -836,8 +836,8 @@ def asof_join_forward_on_X_by_Y(numeric_t[:] left_values, def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values, ndarray[numeric_t] right_values, - by_t[:] left_by_values, - by_t[:] right_by_values, + ndarray[by_t] left_by_values, + ndarray[by_t] right_by_values, bint allow_exact_matches=True, tolerance=None, bint use_hashtable=True): @@ -856,7 +856,11 @@ def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values, # Doesn't matter what type we choose # (nothing happens anyways since it is None) # GH 51640 - bli, bri = asof_join_backward_on_X_by_Y[f"{left_values.dtype}_t", object]( + if left_by_values is not None and left_by_values.dtype != object: + by_dtype = f"{left_by_values.dtype}_t" + else: + by_dtype = object + bli, bri = asof_join_backward_on_X_by_Y[f"{left_values.dtype}_t", by_dtype]( left_values, right_values, left_by_values, @@ -865,7 +869,7 @@ def asof_join_nearest_on_X_by_Y(ndarray[numeric_t] left_values, tolerance, use_hashtable ) - fli, fri = asof_join_forward_on_X_by_Y[f"{left_values.dtype}_t", object]( + fli, fri = asof_join_forward_on_X_by_Y[f"{left_values.dtype}_t", by_dtype]( left_values, right_values, left_by_values,