From 3b57972ceb49c11198eb0bec6be0006260d48085 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 1 Feb 2024 09:04:05 +0000 Subject: [PATCH 01/50] CoW: Enable CoW by default and remove warning build (#56633) --- .github/workflows/unit-tests.yml | 12 -------- asv_bench/benchmarks/algos/isin.py | 3 +- asv_bench/benchmarks/strings.py | 3 +- pandas/_config/__init__.py | 6 ++-- pandas/conftest.py | 4 +-- pandas/core/generic.py | 37 +++++++----------------- pandas/core/indexes/multi.py | 2 ++ pandas/tests/copy_view/test_internals.py | 30 ------------------- pandas/tests/extension/conftest.py | 7 ++--- pandas/tests/frame/methods/test_copy.py | 22 -------------- pandas/tests/series/test_ufunc.py | 5 +--- pandas/tests/test_downstream.py | 4 +-- pandas/util/_test_decorators.py | 14 --------- 13 files changed, 26 insertions(+), 123 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a3cffb4b03b93..2b09aa9343b79 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -73,18 +73,6 @@ jobs: env_file: actions-312.yaml pattern: "not slow and not network and not single_cpu" pandas_copy_on_write: "1" - - name: "Copy-on-Write 3.11 (warnings)" - env_file: actions-311.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "warn" - - name: "Copy-on-Write 3.10 (warnings)" - env_file: actions-310.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "warn" - - name: "Copy-on-Write 3.9 (warnings)" - env_file: actions-39.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "warn" - name: "Pypy" env_file: actions-pypy-39.yaml pattern: "not slow and not network and not single_cpu" diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py index f9ea3d5684751..a17732c70c2c7 100644 --- a/asv_bench/benchmarks/algos/isin.py +++ b/asv_bench/benchmarks/algos/isin.py @@ -59,7 +59,8 @@ def setup(self, dtype): elif dtype in ["str", "string[python]", "string[pyarrow]"]: try: self.series = Series( - Index([f"i-{i}" for i in range(N)], dtype=object), dtype=dtype + Index([f"i-{i}" for i in range(N)], dtype=object)._values, + dtype=dtype, ) except ImportError as err: raise NotImplementedError from err diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py index e6842fbc13f46..467fab857d306 100644 --- a/asv_bench/benchmarks/strings.py +++ b/asv_bench/benchmarks/strings.py @@ -19,7 +19,8 @@ class Dtypes: def setup(self, dtype): try: self.s = Series( - Index([f"i-{i}" for i in range(10000)], dtype=object), dtype=dtype + Index([f"i-{i}" for i in range(10000)], dtype=object)._values, + dtype=dtype, ) except ImportError as err: raise NotImplementedError from err diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 5b2bac2e8d747..0594d1c190a72 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -32,13 +32,11 @@ def using_copy_on_write() -> bool: - _mode_options = _global_config["mode"] - return _mode_options["copy_on_write"] is True + return True def warn_copy_on_write() -> bool: - _mode_options = _global_config["mode"] - return _mode_options["copy_on_write"] == "warn" + return False def using_nullable_dtypes() -> bool: diff --git a/pandas/conftest.py b/pandas/conftest.py index 26e03ca30d4fb..db251a07aeb5d 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1963,7 +1963,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return pd.options.mode.copy_on_write is True + return True @pytest.fixture @@ -1971,7 +1971,7 @@ def warn_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is in warning mode. """ - return pd.options.mode.copy_on_write == "warn" + return False @pytest.fixture diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 037f809eaabca..73f9481e53dea 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6704,8 +6704,7 @@ def copy(self, deep: bool_t | None = True) -> Self: :ref:`gotchas ` when copying in a threading environment. - When ``copy_on_write`` in pandas config is set to ``True``, the - ``copy_on_write`` config takes effect even when ``deep=False``. + Copy-on-Write protects shallow copies against accidental modifications. This means that any changes to the copied data would make a new copy of the data upon write (and vice versa). Changes made to either the original or copied variable would not be reflected in the counterpart. @@ -6731,12 +6730,15 @@ def copy(self, deep: bool_t | None = True) -> Self: >>> deep = s.copy() >>> shallow = s.copy(deep=False) - Shallow copy shares data and index with original. + Shallow copy shares index with original, the data is a + view of the original. >>> s is shallow False - >>> s.values is shallow.values and s.index is shallow.index - True + >>> s.values is shallow.values + False + >>> s.index is shallow.index + False Deep copy has own copy of data and index. @@ -6745,18 +6747,17 @@ def copy(self, deep: bool_t | None = True) -> Self: >>> s.values is deep.values or s.index is deep.index False - Updates to the data shared by shallow copy and original is reflected - in both (NOTE: this will no longer be true for pandas >= 3.0); - deep copy remains unchanged. + The shallow copy is protected against updating the original object + as well. Thus, updates will only reflect in one of both objects. >>> s.iloc[0] = 3 >>> shallow.iloc[1] = 4 >>> s a 3 - b 4 + b 2 dtype: int64 >>> shallow - a 3 + a 1 b 4 dtype: int64 >>> deep @@ -6779,22 +6780,6 @@ def copy(self, deep: bool_t | None = True) -> Self: 0 [10, 2] 1 [3, 4] dtype: object - - **Copy-on-Write is set to true**, the shallow copy is not modified - when the original data is changed: - - >>> with pd.option_context("mode.copy_on_write", True): - ... s = pd.Series([1, 2], index=["a", "b"]) - ... copy = s.copy(deep=False) - ... s.iloc[0] = 100 - ... s - a 100 - b 2 - dtype: int64 - >>> copy - a 1 - b 2 - dtype: int64 """ data = self._mgr.copy(deep=deep) self._clear_item_cache() diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a80ee421a1b8a..0495f23508c09 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3492,6 +3492,8 @@ def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: "cannot index with a boolean indexer that " "is not the same length as the index" ) + if isinstance(k, (ABCSeries, Index)): + k = k._values lvl_indexer = np.asarray(k) if indexer is None: lvl_indexer = lvl_indexer.copy() diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index 400fb8e03c18c..f1a4decce623f 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -1,7 +1,6 @@ import numpy as np import pytest -import pandas as pd from pandas import DataFrame import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -42,35 +41,6 @@ def test_consolidate(using_copy_on_write): assert df.loc[0, "b"] == 0.1 -@pytest.mark.single_cpu -def test_switch_options(): - # ensure we can switch the value of the option within one session - # (assuming data is constructed after switching) - - # using the option_context to ensure we set back to global option value - # after running the test - with pd.option_context("mode.copy_on_write", False): - df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - subset = df[:] - subset.iloc[0, 0] = 0 - # df updated with CoW disabled - assert df.iloc[0, 0] == 0 - - pd.options.mode.copy_on_write = True - df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - subset = df[:] - subset.iloc[0, 0] = 0 - # df not updated with CoW enabled - assert df.iloc[0, 0] == 1 - - pd.options.mode.copy_on_write = False - df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) - subset = df[:] - subset.iloc[0, 0] = 0 - # df updated with CoW disabled - assert df.iloc[0, 0] == 0 - - @pytest.mark.parametrize("dtype", [np.intp, np.int8]) @pytest.mark.parametrize( "locs, arr", diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index 77f1dd2a8e49c..3a3844d5a8b7a 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -2,10 +2,7 @@ import pytest -from pandas import ( - Series, - options, -) +from pandas import Series @pytest.fixture @@ -222,4 +219,4 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return options.mode.copy_on_write is True + return True diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py index 6208d0256a655..5b72a84320c52 100644 --- a/pandas/tests/frame/methods/test_copy.py +++ b/pandas/tests/frame/methods/test_copy.py @@ -1,10 +1,7 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - from pandas import DataFrame -import pandas._testing as tm class TestCopy: @@ -18,25 +15,6 @@ def test_copy_index_name_checking(self, float_frame, attr): getattr(cp, attr).name = "foo" assert getattr(float_frame, attr).name is None - @td.skip_copy_on_write_invalid_test - def test_copy_cache(self): - # GH#31784 _item_cache not cleared on copy causes incorrect reads after updates - df = DataFrame({"a": [1]}) - - df["x"] = [0] - df["a"] - - df.copy() - - df["a"].values[0] = -1 - - tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0]})) - - df["y"] = [0] - - assert df["a"].values[0] == -1 - tm.assert_frame_equal(df, DataFrame({"a": [-1], "x": [0], "y": [0]})) - def test_copy(self, float_frame, float_string_frame): cop = float_frame.copy() cop["E"] = cop["A"] diff --git a/pandas/tests/series/test_ufunc.py b/pandas/tests/series/test_ufunc.py index 3ef319174313d..94a6910509e2d 100644 --- a/pandas/tests/series/test_ufunc.py +++ b/pandas/tests/series/test_ufunc.py @@ -5,8 +5,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.arrays import SparseArray @@ -456,8 +454,7 @@ def add3(x, y, z): ufunc(ser, ser, df) -# TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082 -@td.skip_copy_on_write_not_yet_implemented +@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082") def test_np_fix(): # np.fix is not a ufunc but is composed of several ufunc calls under the hood # with `out` and `where` keywords diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 10776fe5d050f..feba0e86c6b32 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -58,8 +58,8 @@ def test_dask_ufunc(): s = Series([1.5, 2.3, 3.7, 4.0]) ds = dd.from_pandas(s, npartitions=2) - result = da.fix(ds).compute() - expected = np.fix(s) + result = da.log(ds).compute() + expected = np.log(s) tm.assert_series_equal(result, expected) finally: pd.set_option("compute.use_numexpr", olduse) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 37908c9ac255b..78626781289c4 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -33,12 +33,9 @@ def test_foo(): import pytest -from pandas._config import get_option - if TYPE_CHECKING: from pandas._typing import F - from pandas.compat import ( IS64, is_platform_windows, @@ -144,14 +141,3 @@ def documented_fixture(fixture): return fixture return documented_fixture - - -skip_copy_on_write_not_yet_implemented = pytest.mark.xfail( - get_option("mode.copy_on_write") is True, - reason="Not yet implemented/adapted for Copy-on-Write mode", -) - -skip_copy_on_write_invalid_test = pytest.mark.skipif( - get_option("mode.copy_on_write") is True, - reason="Test not valid for Copy-on-Write mode", -) From e982297ffa6d814994a5880f1c12d83af814ede0 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 2 Feb 2024 00:14:55 +0700 Subject: [PATCH 02/50] CI: Pinning doc previewer action to current release (#57190) --- .github/workflows/comment-commands.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/comment-commands.yml b/.github/workflows/comment-commands.yml index b24a689078d4e..da9f6ac8bff78 100644 --- a/.github/workflows/comment-commands.yml +++ b/.github/workflows/comment-commands.yml @@ -24,7 +24,7 @@ jobs: concurrency: group: ${{ github.actor }}-preview-docs steps: - - uses: pandas-dev/github-doc-previewer@master + - uses: pandas-dev/github-doc-previewer@v0.3.1 with: previewer-server: "https://pandas.pydata.org/preview" artifact-job: "Doc Build and Upload" From 965a65b72b3e14014b8de817b450571bc4f2f6f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C3=ABl=20Defferrard?= Date: Thu, 1 Feb 2024 18:32:41 +0100 Subject: [PATCH 03/50] fix typo (#57194) --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 67885fcaec852..e4502c031b1fb 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -952,7 +952,7 @@ class Window(BaseWindow): If ``'left'``, the last point in the window is excluded from calculations. - If ``'both'``, the no points in the window are excluded from calculations. + If ``'both'``, no point in the window is excluded from calculations. If ``'neither'``, the first and last points in the window are excluded from calculations. From 24f7db72a3c93a4d0cfa3763724c01ac65d412c6 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 1 Feb 2024 12:39:51 -0500 Subject: [PATCH 04/50] DEPR: Enforce deprecation of groupby(..., axis=1) (#57186) * DEPR: Enforce deprecation of groupby(..., axis=1) * More removals and cleanups * whatsnew --- doc/source/user_guide/groupby.rst | 2 +- doc/source/user_guide/window.rst | 6 +- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 21 -- pandas/core/generic.py | 101 +-------- pandas/core/groupby/generic.py | 102 +++------ pandas/core/groupby/groupby.py | 213 +++++------------- pandas/core/groupby/grouper.py | 52 ++--- pandas/core/groupby/ops.py | 52 ++--- pandas/core/resample.py | 33 +-- pandas/core/reshape/merge.py | 2 +- pandas/core/reshape/pivot.py | 2 +- pandas/core/series.py | 3 - pandas/core/shared_docs.py | 9 - pandas/core/window/ewm.py | 24 +- pandas/core/window/expanding.py | 12 +- pandas/core/window/numba_.py | 3 +- pandas/core/window/online.py | 7 +- pandas/core/window/rolling.py | 47 +--- pandas/plotting/_matplotlib/boxplot.py | 7 +- pandas/tests/apply/test_str.py | 30 +-- .../tests/groupby/aggregate/test_aggregate.py | 94 -------- pandas/tests/groupby/aggregate/test_cython.py | 15 -- pandas/tests/groupby/methods/test_describe.py | 12 - pandas/tests/groupby/methods/test_nth.py | 73 ------ pandas/tests/groupby/methods/test_quantile.py | 26 --- pandas/tests/groupby/methods/test_size.py | 31 --- .../groupby/methods/test_value_counts.py | 8 - pandas/tests/groupby/test_apply.py | 59 ----- pandas/tests/groupby/test_apply_mutate.py | 63 ------ pandas/tests/groupby/test_categorical.py | 16 -- pandas/tests/groupby/test_filters.py | 13 -- pandas/tests/groupby/test_groupby.py | 162 +------------ pandas/tests/groupby/test_grouping.py | 60 +---- pandas/tests/groupby/test_indexing.py | 14 -- pandas/tests/groupby/test_numba.py | 7 - pandas/tests/groupby/test_reductions.py | 24 +- pandas/tests/groupby/test_timegrouper.py | 2 +- .../tests/groupby/transform/test_transform.py | 112 +-------- pandas/tests/plotting/test_boxplot_method.py | 19 -- pandas/tests/resample/test_datetime_index.py | 35 --- pandas/tests/resample/test_resample_api.py | 55 +---- pandas/tests/resample/test_time_grouper.py | 4 +- pandas/tests/test_multilevel.py | 31 --- pandas/tests/window/test_api.py | 39 ---- pandas/tests/window/test_apply.py | 10 - pandas/tests/window/test_ewm.py | 50 ---- pandas/tests/window/test_expanding.py | 42 +--- pandas/tests/window/test_numba.py | 39 ++-- pandas/tests/window/test_rolling.py | 163 +------------- pandas/tests/window/test_timeseries_window.py | 14 +- pandas/tests/window/test_win_type.py | 17 -- 52 files changed, 213 insertions(+), 1825 deletions(-) diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index a25e2ed179b80..2a4d7791322e5 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -247,7 +247,7 @@ GroupBy object attributes ~~~~~~~~~~~~~~~~~~~~~~~~~ The ``groups`` attribute is a dictionary whose keys are the computed unique groups -and corresponding values are the axis labels belonging to each group. In the +and corresponding values are the index labels belonging to each group. In the above example we have: .. ipython:: python diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index d997aa119b359..e25c4c2441920 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -79,9 +79,9 @@ which will first group the data by the specified keys and then perform a windowi .. versionadded:: 1.3.0 Some windowing operations also support the ``method='table'`` option in the constructor which -performs the windowing operation over an entire :class:`DataFrame` instead of a single column or row at a time. -This can provide a useful performance benefit for a :class:`DataFrame` with many columns or rows -(with the corresponding ``axis`` argument) or the ability to utilize other columns during the windowing +performs the windowing operation over an entire :class:`DataFrame` instead of a single column at a time. +This can provide a useful performance benefit for a :class:`DataFrame` with many columns +or the ability to utilize other columns during the windowing operation. The ``method='table'`` option can only be used if ``engine='numba'`` is specified in the corresponding method call. diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f316f6b44c1b4..c5ac2a800223b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,6 +102,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) +- Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed the ``ArrayManager`` (:issue:`55043`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b3e18d6ceaddd..ca488190a8704 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9050,7 +9050,6 @@ def update( def groupby( self, by=None, - axis: Axis | lib.NoDefault = lib.no_default, level: IndexLabel | None = None, as_index: bool = True, sort: bool = True, @@ -9058,25 +9057,6 @@ def groupby( observed: bool | lib.NoDefault = lib.no_default, dropna: bool = True, ) -> DataFrameGroupBy: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - if axis == 1: - warnings.warn( - "DataFrame.groupby with axis=1 is deprecated. Do " - "`frame.T.groupby(...)` without axis instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - "The 'axis' keyword in DataFrame.groupby is deprecated and " - "will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - from pandas.core.groupby.generic import DataFrameGroupBy if level is None and by is None: @@ -9085,7 +9065,6 @@ def groupby( return DataFrameGroupBy( obj=self, keys=by, - axis=axis, level=level, as_index=as_index, sort=sort, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 73f9481e53dea..0afc1b607a8dc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9351,7 +9351,6 @@ def between_time( def resample( self, rule, - axis: Axis | lib.NoDefault = lib.no_default, closed: Literal["right", "left"] | None = None, label: Literal["right", "left"] | None = None, convention: Literal["start", "end", "s", "e"] | lib.NoDefault = lib.no_default, @@ -9374,13 +9373,6 @@ def resample( ---------- rule : DateOffset, Timedelta or str The offset string or object representing target conversion. - axis : {{0 or 'index', 1 or 'columns'}}, default 0 - Which axis to use for up- or down-sampling. For `Series` this parameter - is unused and defaults to 0. Must be - `DatetimeIndex`, `TimedeltaIndex` or `PeriodIndex`. - - .. deprecated:: 2.0.0 - Use frame.T.resample(...) instead. closed : {{'right', 'left'}}, default None Which side of bin interval is closed. The default is 'left' for all frequency offsets except for 'ME', 'YE', 'QE', 'BME', @@ -9692,25 +9684,6 @@ def resample( """ from pandas.core.resample import get_resampler - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - if axis == 1: - warnings.warn( - "DataFrame.resample with axis=1 is deprecated. Do " - "`frame.T.resample(...)` without axis instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.resample is " - "deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - if kind is not lib.no_default: # GH#55895 warnings.warn( @@ -9740,7 +9713,6 @@ def resample( freq=rule, label=label, closed=closed, - axis=axis, kind=kind, convention=convention, key=on, @@ -12511,33 +12483,10 @@ def rolling( center: bool_t = False, win_type: str | None = None, on: str | None = None, - axis: Axis | lib.NoDefault = lib.no_default, closed: IntervalClosedType | None = None, step: int | None = None, method: str = "single", ) -> Window | Rolling: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "rolling" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - if win_type is not None: return Window( self, @@ -12546,7 +12495,6 @@ def rolling( center=center, win_type=win_type, on=on, - axis=axis, closed=closed, step=step, method=method, @@ -12559,7 +12507,6 @@ def rolling( center=center, win_type=win_type, on=on, - axis=axis, closed=closed, step=step, method=method, @@ -12570,31 +12517,9 @@ def rolling( def expanding( self, min_periods: int = 1, - axis: Axis | lib.NoDefault = lib.no_default, method: Literal["single", "table"] = "single", ) -> Expanding: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "expanding" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - return Expanding(self, min_periods=min_periods, axis=axis, method=method) + return Expanding(self, min_periods=min_periods, method=method) @final @doc(ExponentialMovingWindow) @@ -12607,32 +12532,9 @@ def ewm( min_periods: int | None = 0, adjust: bool_t = True, ignore_na: bool_t = False, - axis: Axis | lib.NoDefault = lib.no_default, times: np.ndarray | DataFrame | Series | None = None, method: Literal["single", "table"] = "single", ) -> ExponentialMovingWindow: - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "ewm" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - "deprecated and will be removed in a future version. " - "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - return ExponentialMovingWindow( self, com=com, @@ -12642,7 +12544,6 @@ def ewm( min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, - axis=axis, times=times, method=method, ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 1a23d237dca46..e0811c914864b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -95,7 +95,6 @@ from pandas._typing import ( ArrayLike, - AxisInt, BlockManager, CorrelationMethod, IndexLabel, @@ -446,9 +445,7 @@ def _aggregate_named(self, func, *args, **kwargs): result = {} initialized = False - for name, group in self._grouper.get_iterator( - self._obj_with_exclusions, axis=self.axis - ): + for name, group in self._grouper.get_iterator(self._obj_with_exclusions): # needed for pandas/tests/groupby/test_groupby.py::test_basic_aggregations object.__setattr__(group, "name", name) @@ -512,16 +509,12 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) - def _cython_transform( - self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs - ): - assert axis == 0 # handled by caller - + def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs): obj = self._obj_with_exclusions try: result = self._grouper._cython_operation( - "transform", obj._values, how, axis, **kwargs + "transform", obj._values, how, 0, **kwargs ) except NotImplementedError as err: # e.g. test_groupby_raises_string @@ -544,7 +537,7 @@ def _transform_general( results = [] for name, group in self._grouper.get_iterator( - self._obj_with_exclusions, axis=self.axis + self._obj_with_exclusions, ): # this setattr is needed for test_transform_lambda_with_datetimetz object.__setattr__(group, "name", name) @@ -615,9 +608,7 @@ def true_and_notna(x) -> bool: try: indices = [ self._get_index(name) - for name, group in self._grouper.get_iterator( - self._obj_with_exclusions, axis=self.axis - ) + for name, group in self._grouper.get_iterator(self._obj_with_exclusions) if true_and_notna(group) ] except (ValueError, TypeError) as err: @@ -928,7 +919,7 @@ def take( 0 rabbit mammal 15.0 >>> gb = df["name"].groupby([1, 1, 2, 2, 2]) - Take elements at positions 0 and 1 along the axis 0 in each group (default). + Take elements at rows 0 and 1 in each group. >>> gb.take([0, 1]) 1 4 falcon @@ -947,7 +938,7 @@ def take( 1 monkey Name: name, dtype: object """ - result = self._op_via_apply("take", indices=indices, axis=0, **kwargs) + result = self._op_via_apply("take", indices=indices, **kwargs) return result def skew( @@ -1334,12 +1325,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) # can't return early result = self._aggregate_frame(func, *args, **kwargs) - elif self.axis == 1: - # _aggregate_multiple_funcs does not allow self.axis == 1 - # Note: axis == 1 precludes 'not self.as_index', see __init__ - result = self._aggregate_frame(func) - return result - else: # try to treat as if we are passing a list gba = GroupByApply(self, [func], args=(), kwargs={}) @@ -1385,8 +1370,6 @@ def _python_agg_general(self, func, *args, **kwargs): return self._python_apply_general(f, self._selected_obj, is_agg=True) obj = self._obj_with_exclusions - if self.axis == 1: - obj = obj.T if not len(obj.columns): # e.g. test_margins_no_values_no_cols @@ -1408,15 +1391,13 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: obj = self._obj_with_exclusions result: dict[Hashable, NDFrame | np.ndarray] = {} - for name, grp_df in self._grouper.get_iterator(obj, self.axis): + for name, grp_df in self._grouper.get_iterator(obj): fres = func(grp_df, *args, **kwargs) result[name] = fres result_index = self._grouper.result_index - other_ax = obj.axes[1 - self.axis] - out = self.obj._constructor(result, index=other_ax, columns=result_index) - if self.axis == 0: - out = out.T + out = self.obj._constructor(result, index=obj.columns, columns=result_index) + out = out.T return out @@ -1516,18 +1497,13 @@ def _wrap_applied_output_series( # vstack+constructor is faster than concat and handles MI-columns stacked_values = np.vstack([np.asarray(v) for v in values]) - if self.axis == 0: - index = key_index - columns = first_not_none.index.copy() - if columns.name is None: - # GH6124 - propagate name of Series when it's consistent - names = {v.name for v in values} - if len(names) == 1: - columns.name = next(iter(names)) - else: - index = first_not_none.index - columns = key_index - stacked_values = stacked_values.T + index = key_index + columns = first_not_none.index.copy() + if columns.name is None: + # GH6124 - propagate name of Series when it's consistent + names = {v.name for v in values} + if len(names) == 1: + columns.name = next(iter(names)) if stacked_values.dtype == object: # We'll have the DataFrame constructor do inference @@ -1543,16 +1519,11 @@ def _cython_transform( self, how: str, numeric_only: bool = False, - axis: AxisInt = 0, **kwargs, ) -> DataFrame: - assert axis == 0 # handled by caller - - # With self.axis == 0, we have multi-block tests + # We have multi-block tests # e.g. test_rank_min_int, test_cython_transform_frame # test_transform_numeric_ret - # With self.axis == 1, _get_data_to_aggregate does a transpose - # so we always have a single block. mgr: BlockManager = self._get_data_to_aggregate( numeric_only=numeric_only, name=how ) @@ -1565,7 +1536,6 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: res_mgr = mgr.apply(arr_func) res_df = self.obj._constructor_from_mgr(res_mgr, axes=res_mgr.axes) - res_df = self._maybe_transpose_result(res_df) return res_df def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): @@ -1577,7 +1547,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): applied = [] obj = self._obj_with_exclusions - gen = self._grouper.get_iterator(obj, axis=self.axis) + gen = self._grouper.get_iterator(obj) fast_path, slow_path = self._define_paths(func, *args, **kwargs) # Determine whether to use slow or fast path by evaluating on the first group. @@ -1611,10 +1581,9 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): res = _wrap_transform_general_frame(self.obj, group, res) applied.append(res) - concat_index = obj.columns if self.axis == 0 else obj.index - other_axis = 1 if self.axis == 0 else 0 # switches between 0 & 1 - concatenated = concat(applied, axis=self.axis, verify_integrity=False) - concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) + concat_index = obj.columns + concatenated = concat(applied, axis=0, verify_integrity=False) + concatenated = concatenated.reindex(concat_index, axis=1, copy=False) return self._set_result_index_ordered(concatenated) __examples_dataframe_doc = dedent( @@ -1682,12 +1651,12 @@ def _define_paths(self, func, *args, **kwargs): if isinstance(func, str): fast_path = lambda group: getattr(group, func)(*args, **kwargs) slow_path = lambda group: group.apply( - lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis + lambda x: getattr(x, func)(*args, **kwargs), axis=0 ) else: fast_path = lambda group: func(group, *args, **kwargs) slow_path = lambda group: group.apply( - lambda x: func(x, *args, **kwargs), axis=self.axis + lambda x: func(x, *args, **kwargs), axis=0 ) return fast_path, slow_path @@ -1771,7 +1740,7 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): indices = [] obj = self._selected_obj - gen = self._grouper.get_iterator(obj, axis=self.axis) + gen = self._grouper.get_iterator(obj) for name, group in gen: # 2023-02-27 no tests are broken this pinning, but it is documented in the @@ -1799,9 +1768,6 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): return self._apply_filter(indices, dropna) def __getitem__(self, key) -> DataFrameGroupBy | SeriesGroupBy: - if self.axis == 1: - # GH 37725 - raise ValueError("Cannot subset columns when using axis=1") # per GH 23566 if isinstance(key, tuple) and len(key) > 1: # if len == 1, then it becomes a SeriesGroupBy and this is actually @@ -1831,7 +1797,6 @@ def _gotitem(self, key, ndim: int, subset=None): return DataFrameGroupBy( subset, self.keys, - axis=self.axis, level=self.level, grouper=self._grouper, exclusions=self.exclusions, @@ -1865,11 +1830,7 @@ def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None ) -> BlockManager: obj = self._obj_with_exclusions - if self.axis == 1: - mgr = obj.T._mgr - else: - mgr = obj._mgr - + mgr = obj._mgr if numeric_only: mgr = mgr.get_numeric_data() return mgr @@ -1949,13 +1910,6 @@ def nunique(self, dropna: bool = True) -> DataFrame: 4 ham 5 x 5 ham 5 y """ - - if self.axis != 0: - # see test_groupby_crash_on_nunique - return self._python_apply_general( - lambda sgb: sgb.nunique(dropna), self._obj_with_exclusions, is_agg=True - ) - return self._apply_to_column_groupbys(lambda sgb: sgb.nunique(dropna)) def idxmax( @@ -2250,7 +2204,7 @@ def take( 0 rabbit mammal 15.0 >>> gb = df.groupby([1, 1, 2, 2, 2]) - Take elements at positions 0 and 1 along the axis 0 (default). + Take elements at rows 0 and 1. Note how the indices selected in the result do not correspond to our input indices 0 and 1. That's because we are selecting the 0th @@ -2283,7 +2237,7 @@ def take( 2 0 rabbit mammal 15.0 1 monkey mammal NaN """ - result = self._op_via_apply("take", indices=indices, axis=0, **kwargs) + result = self._op_via_apply("take", indices=indices, **kwargs) return result def skew( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1f0e0567446c6..c4ae47348a64c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -19,7 +19,6 @@ class providing the base-class of operations. partial, wraps, ) -import inspect from textwrap import dedent from typing import ( TYPE_CHECKING, @@ -47,8 +46,6 @@ class providing the base-class of operations. from pandas._typing import ( AnyArrayLike, ArrayLike, - Axis, - AxisInt, DtypeObj, FillnaOptions, IndexLabel, @@ -533,8 +530,7 @@ class providing the base-class of operations. -------- %(klass)s.groupby.apply : Apply function ``func`` group-wise and combine the results together. -%(klass)s.groupby.aggregate : Aggregate using one or more - operations over the specified axis. +%(klass)s.groupby.aggregate : Aggregate using one or more operations. %(klass)s.transform : Call ``func`` on self producing a %(klass)s with the same axis shape as self. @@ -576,7 +572,7 @@ class providing the base-class of operations. %(example)s""" _agg_template_series = """ -Aggregate using one or more operations over the specified axis. +Aggregate using one or more operations. Parameters ---------- @@ -638,8 +634,7 @@ class providing the base-class of operations. and combine the results together. {klass}.groupby.transform : Transforms the Series on each group based on the given function. -{klass}.aggregate : Aggregate using one or more - operations over the specified axis. +{klass}.aggregate : Aggregate using one or more operations. Notes ----- @@ -658,7 +653,7 @@ class providing the base-class of operations. {examples}""" _agg_template_frame = """ -Aggregate using one or more operations over the specified axis. +Aggregate using one or more operations. Parameters ---------- @@ -671,7 +666,7 @@ class providing the base-class of operations. - function - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. + - dict of index labels -> functions, function names or list of such. - None, in which case ``**kwargs`` are used with Named Aggregation. Here the output has one column for each element in ``**kwargs``. The name of the column is keyword, whereas the value determines the aggregation used to compute @@ -717,8 +712,7 @@ class providing the base-class of operations. and combine the results together. {klass}.groupby.transform : Transforms the Series on each group based on the given function. -{klass}.aggregate : Aggregate using one or more - operations over the specified axis. +{klass}.aggregate : Aggregate using one or more operations. Notes ----- @@ -775,7 +769,6 @@ def f(self): class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): _hidden_attrs = PandasObject._hidden_attrs | { "as_index", - "axis", "dropna", "exclusions", "grouper", @@ -787,7 +780,6 @@ class BaseGroupBy(PandasObject, SelectionMixin[NDFrameT], GroupByIndexingMixin): "sort", } - axis: AxisInt _grouper: ops.BaseGrouper keys: _KeysArgType | None = None level: IndexLabel | None = None @@ -1132,9 +1124,7 @@ def get_group(self, name) -> DataFrame | Series: inds = self._get_index(name) if not len(inds): raise KeyError(name) - - indexer = inds if self.axis == 0 else (slice(None), inds) - return self._selected_obj.iloc[indexer] + return self._selected_obj.iloc[inds] @final def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: @@ -1210,7 +1200,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: """ keys = self.keys level = self.level - result = self._grouper.get_iterator(self._selected_obj, axis=self.axis) + result = self._grouper.get_iterator(self._selected_obj) # error: Argument 1 to "len" has incompatible type "Hashable"; expected "Sized" if is_list_like(level) and len(level) == 1: # type: ignore[arg-type] # GH 51583 @@ -1247,7 +1237,6 @@ class GroupBy(BaseGroupBy[NDFrameT]): Parameters ---------- obj : pandas object - axis : int, default 0 level : int, default None Level of MultiIndex groupings : list of Grouping objects @@ -1276,7 +1265,7 @@ class GroupBy(BaseGroupBy[NDFrameT]): :: - grouped = obj.groupby(keys, axis=axis) + grouped = obj.groupby(keys) for key, group in grouped: # do something with the data @@ -1308,7 +1297,6 @@ def __init__( self, obj: NDFrameT, keys: _KeysArgType | None = None, - axis: Axis = 0, level: IndexLabel | None = None, grouper: ops.BaseGrouper | None = None, exclusions: frozenset[Hashable] | None = None, @@ -1324,11 +1312,6 @@ def __init__( assert isinstance(obj, NDFrame), type(obj) self.level = level - - if not as_index: - if axis != 0: - raise ValueError("as_index=False only valid for axis=0") - self.as_index = as_index self.keys = keys self.sort = sort @@ -1339,7 +1322,6 @@ def __init__( grouper, exclusions, obj = get_grouper( obj, keys, - axis=axis, level=level, sort=sort, observed=False if observed is lib.no_default else observed, @@ -1360,7 +1342,6 @@ def __init__( self.observed = observed self.obj = obj - self.axis = obj._get_axis_number(axis) self._grouper = grouper self.exclusions = frozenset(exclusions) if exclusions else frozenset() @@ -1374,35 +1355,10 @@ def __getattr__(self, attr: str): f"'{type(self).__name__}' object has no attribute '{attr}'" ) - @final - def _deprecate_axis(self, axis: int, name: str) -> None: - if axis == 1: - warnings.warn( - f"{type(self).__name__}.{name} with axis=1 is deprecated and " - "will be removed in a future version. Operate on the un-grouped " - "DataFrame instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is deprecated " - "and will be removed in a future version. " - "Call without passing 'axis' instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - @final def _op_via_apply(self, name: str, *args, **kwargs): """Compute the result of an operation by using GroupBy's apply.""" f = getattr(type(self._obj_with_exclusions), name) - sig = inspect.signature(f) - - # a little trickery for aggregation functions that need an axis - # argument - if "axis" in sig.parameters: - kwargs["axis"] = self.axis def curried(x): return f(x, *args, **kwargs) @@ -1451,7 +1407,7 @@ def _concat_objects( result = concat( values, - axis=self.axis, + axis=0, keys=group_keys, levels=group_levels, names=group_names, @@ -1461,12 +1417,12 @@ def _concat_objects( # GH5610, returns a MI, with the first level being a # range index keys = list(range(len(values))) - result = concat(values, axis=self.axis, keys=keys) + result = concat(values, axis=0, keys=keys) elif not not_indexed_same: - result = concat(values, axis=self.axis) + result = concat(values, axis=0) - ax = self._selected_obj._get_axis(self.axis) + ax = self._selected_obj.index if self.dropna: labels = self._grouper.group_info[0] mask = labels != -1 @@ -1478,16 +1434,16 @@ def _concat_objects( # so we resort to this # GH 14776, 30667 # TODO: can we reuse e.g. _reindex_non_unique? - if ax.has_duplicates and not result.axes[self.axis].equals(ax): + if ax.has_duplicates and not result.axes[0].equals(ax): # e.g. test_category_order_transformer target = algorithms.unique1d(ax._values) indexer, _ = result.index.get_indexer_non_unique(target) - result = result.take(indexer, axis=self.axis) + result = result.take(indexer, axis=0) else: - result = result.reindex(ax, axis=self.axis, copy=False) + result = result.reindex(ax, axis=0, copy=False) else: - result = concat(values, axis=self.axis) + result = concat(values, axis=0) if self.obj.ndim == 1: name = self.obj.name @@ -1508,22 +1464,22 @@ def _set_result_index_ordered( # set the result index on the passed values object and # return the new object, xref 8046 - obj_axis = self.obj._get_axis(self.axis) + index = self.obj.index if self._grouper.is_monotonic and not self._grouper.has_dropped_na: # shortcut if we have an already ordered grouper - result = result.set_axis(obj_axis, axis=self.axis, copy=False) + result = result.set_axis(index, axis=0, copy=False) return result # row order is scrambled => sort the rows by position in original index original_positions = Index(self._grouper.result_ilocs()) - result = result.set_axis(original_positions, axis=self.axis, copy=False) - result = result.sort_index(axis=self.axis) + result = result.set_axis(original_positions, axis=0, copy=False) + result = result.sort_index(axis=0) if self._grouper.has_dropped_na: # Add back in any missing rows due to dropna - index here is integral # with values referring to the row of the input so can use RangeIndex - result = result.reindex(RangeIndex(len(obj_axis)), axis=self.axis) - result = result.set_axis(obj_axis, axis=self.axis, copy=False) + result = result.reindex(RangeIndex(len(index)), axis=0) + result = result.set_axis(index, axis=0, copy=False) return result @@ -1559,17 +1515,6 @@ def _insert_inaxis_grouper(self, result: Series | DataFrame) -> DataFrame: return result - @final - def _maybe_transpose_result(self, result: NDFrameT) -> NDFrameT: - if self.axis == 1: - # Only relevant for DataFrameGroupBy, no-op for SeriesGroupBy - result = result.T - if result.index.equals(self.obj.index): - # Retain e.g. DatetimeIndex/TimedeltaIndex freq - # e.g. test_groupby_crash_on_nunique - result.index = self.obj.index.copy() - return result - @final def _wrap_aggregated_output( self, @@ -1607,10 +1552,7 @@ def _wrap_aggregated_output( result.index = index - # error: Argument 1 to "_maybe_transpose_result" of "GroupBy" has - # incompatible type "Union[Series, DataFrame]"; expected "NDFrameT" - res = self._maybe_transpose_result(result) # type: ignore[arg-type] - return self._reindex_output(res, qs=qs) + return self._reindex_output(result, qs=qs) def _wrap_applied_output( self, @@ -1630,7 +1572,7 @@ def _numba_prep(self, data: DataFrame): sorted_index = self._grouper._sort_idx sorted_ids = self._grouper._sorted_ids - sorted_data = data.take(sorted_index, axis=self.axis).to_numpy() + sorted_data = data.take(sorted_index, axis=0).to_numpy() # GH 46867 index_data = data.index if isinstance(index_data, MultiIndex): @@ -1666,8 +1608,6 @@ def _numba_agg_general( raise NotImplementedError( "as_index=False is not supported. Use .reset_index() instead." ) - if self.axis == 1: - raise NotImplementedError("axis=1 is not supported.") data = self._obj_with_exclusions df = data if data.ndim == 2 else data.to_frame() @@ -1877,7 +1817,7 @@ def _python_apply_general( Series or DataFrame data after applying f """ - values, mutated = self._grouper.apply_groupwise(f, data, self.axis) + values, mutated = self._grouper.apply_groupwise(f, data) if not_indexed_same is None: not_indexed_same = mutated @@ -1995,13 +1935,9 @@ def array_func(values: ArrayLike) -> ArrayLike: if how in ["idxmin", "idxmax"]: res = self._wrap_idxmax_idxmin(res) out = self._wrap_aggregated_output(res) - if self.axis == 1: - out = out.infer_objects(copy=False) return out - def _cython_transform( - self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs - ): + def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs): raise AbstractMethodError(self) @final @@ -2055,7 +1991,7 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: # for each col, reshape to size of original frame by take operation ids, _, _ = self._grouper.group_info - result = result.reindex(self._grouper.result_index, axis=self.axis, copy=False) + result = result.reindex(self._grouper.result_index, axis=0, copy=False) if self.obj.ndim == 1: # i.e. SeriesGroupBy @@ -2063,15 +1999,14 @@ def _wrap_transform_fast_result(self, result: NDFrameT) -> NDFrameT: output = obj._constructor(out, index=obj.index, name=obj.name) else: # `.size()` gives Series output on DataFrame input, need axis 0 - axis = 0 if result.ndim == 1 else self.axis # GH#46209 # Don't convert indices: negative indices need to give rise # to null values in the result - new_ax = result.axes[axis].take(ids) + new_ax = result.index.take(ids) output = result._reindex_with_indexers( - {axis: (new_ax, ids)}, allow_dups=True, copy=False + {0: (new_ax, ids)}, allow_dups=True, copy=False ) - output = output.set_axis(obj._get_axis(self.axis), axis=axis) + output = output.set_axis(obj.index, axis=0) return output # ----------------------------------------------------------------- @@ -2084,7 +2019,7 @@ def _apply_filter(self, indices, dropna): else: indices = np.sort(np.concatenate(indices)) if dropna: - filtered = self._selected_obj.take(indices, axis=self.axis) + filtered = self._selected_obj.take(indices, axis=0) else: mask = np.empty(len(self._selected_obj.index), dtype=bool) mask.fill(False) @@ -2762,10 +2697,6 @@ def _value_counts( SeriesGroupBy additionally supports a bins argument. See the docstring of DataFrameGroupBy.value_counts for a description of arguments. """ - if self.axis == 1: - raise NotImplementedError( - "DataFrameGroupBy.value_counts only handles axis=0" - ) name = "proportion" if normalize else "count" df = self.obj @@ -2808,7 +2739,6 @@ def _value_counts( grouper, _, _ = get_grouper( df, key=key, - axis=self.axis, sort=self.sort, observed=False, dropna=dropna, @@ -3386,7 +3316,7 @@ def first( 3 6.0 3 """ - def first_compat(obj: NDFrameT, axis: AxisInt = 0): + def first_compat(obj: NDFrameT): def first(x: Series): """Helper function for first item that isn't NA.""" arr = x.array[notna(x.array)] @@ -3395,7 +3325,7 @@ def first(x: Series): return arr[0] if isinstance(obj, DataFrame): - return obj.apply(first, axis=axis) + return obj.apply(first) elif isinstance(obj, Series): return first(obj) else: # pragma: no cover @@ -3455,7 +3385,7 @@ def last( 3 6.0 3 """ - def last_compat(obj: NDFrameT, axis: AxisInt = 0): + def last_compat(obj: NDFrameT): def last(x: Series): """Helper function for last item that isn't NA.""" arr = x.array[notna(x.array)] @@ -3464,7 +3394,7 @@ def last(x: Series): return arr[-1] if isinstance(obj, DataFrame): - return obj.apply(last, axis=axis) + return obj.apply(last) elif isinstance(obj, Series): return last(obj) else: # pragma: no cover @@ -3595,8 +3525,6 @@ def describe( obj, not_indexed_same=True, ) - if self.axis == 1: - return result.T # GH#49256 - properly handle the grouping column(s) result = result.unstack() @@ -3795,13 +3723,6 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: Provided integer column is ignored and excluded from result since an integer index is not used to calculate the rolling window. - axis : int or str, default 0 - If ``0`` or ``'index'``, roll across the rows. - - If ``1`` or ``'columns'``, roll across the columns. - - For `Series` this parameter is unused and defaults to 0. - closed : str, default None If ``'right'``, the first point in the window is excluded from calculations. @@ -3992,12 +3913,6 @@ def blk_func(values: ArrayLike) -> ArrayLike: res_mgr = mgr.apply(blk_func) new_obj = self._wrap_agged_manager(res_mgr) - - if self.axis == 1: - # Only relevant for DataFrameGroupBy - new_obj = new_obj.T - new_obj.columns = self.obj.columns - new_obj.index = self.obj.index return new_obj @@ -4299,7 +4214,7 @@ def _nth( # old behaviour, but with all and any support for DataFrames. # modified in GH 7559 to have better perf n = cast(int, n) - dropped = self._selected_obj.dropna(how=dropna, axis=self.axis) + dropped = self._selected_obj.dropna(how=dropna, axis=0) # get a new grouper for our dropped obj grouper: np.ndarray | Index | ops.BaseGrouper @@ -4320,10 +4235,7 @@ def _nth( values = np.where(nulls, NA, grouper) # type: ignore[call-overload] grouper = Index(values, dtype="Int64") - if self.axis == 1: - grb = dropped.T.groupby(grouper, as_index=self.as_index, sort=self.sort) - else: - grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) + grb = dropped.groupby(grouper, as_index=self.as_index, sort=self.sort) return grb.nth(n) @final @@ -4376,12 +4288,8 @@ def quantile( """ mgr = self._get_data_to_aggregate(numeric_only=numeric_only, name="quantile") obj = self._wrap_agged_manager(mgr) - if self.axis == 1: - splitter = self._grouper._get_splitter(obj.T, axis=self.axis) - sdata = splitter._sorted_data.T - else: - splitter = self._grouper._get_splitter(obj, axis=self.axis) - sdata = splitter._sorted_data + splitter = self._grouper._get_splitter(obj) + sdata = splitter._sorted_data starts, ends = lib.generate_slices(splitter._slabels, splitter.ngroups) @@ -4619,7 +4527,7 @@ def ngroup(self, ascending: bool = True): dtype: int64 """ obj = self._obj_with_exclusions - index = obj._get_axis(self.axis) + index = obj.index comp_ids = self._grouper.group_info[0] dtype: type @@ -4693,7 +4601,7 @@ def cumcount(self, ascending: bool = True): 5 0 dtype: int64 """ - index = self._obj_with_exclusions._get_axis(self.axis) + index = self._obj_with_exclusions.index cumcounts = self._cumcount_array(ascending=ascending) return self._obj_1d_constructor(cumcounts, index) @@ -4780,7 +4688,6 @@ def rank( return self._cython_transform( "rank", numeric_only=False, - axis=0, **kwargs, ) @@ -5127,7 +5034,7 @@ def shift( obj = self._obj_with_exclusions shifted = obj._reindex_with_indexers( - {self.axis: (obj.axes[self.axis], res_indexer)}, + {0: (obj.index, res_indexer)}, fill_value=fill_value, allow_dups=True, ) @@ -5332,13 +5239,8 @@ def pct_change( fill_method = "ffill" limit = 0 filled = getattr(self, fill_method)(limit=limit) - if self.axis == 0: - fill_grp = filled.groupby(self._grouper.codes, group_keys=self.group_keys) - else: - fill_grp = filled.T.groupby(self._grouper.codes, group_keys=self.group_keys) + fill_grp = filled.groupby(self._grouper.codes, group_keys=self.group_keys) shifted = fill_grp.shift(periods=periods, freq=freq) - if self.axis == 1: - shifted = shifted.T return (filled / shifted) - 1 @final @@ -5425,7 +5327,7 @@ def tail(self, n: int = 5) -> NDFrameT: @final def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT: """ - Return _selected_obj with mask applied to the correct axis. + Return _selected_obj with mask applied. Parameters ---------- @@ -5439,11 +5341,7 @@ def _mask_selected_obj(self, mask: npt.NDArray[np.bool_]) -> NDFrameT: """ ids = self._grouper.group_info[0] mask = mask & (ids != -1) - - if self.axis == 0: - return self._selected_obj[mask] - else: - return self._selected_obj.iloc[:, mask] + return self._selected_obj[mask] @final def _reindex_output( @@ -5506,12 +5404,7 @@ def _reindex_output( if self.as_index: # Always holds for SeriesGroupBy unless GH#36507 is implemented - d = { - self.obj._get_axis_name(self.axis): index, - "copy": False, - "fill_value": fill_value, - } - return output.reindex(**d) # type: ignore[arg-type] + return output.reindex(index=index, copy=False, fill_value=fill_value) # GH 13204 # Here, the categorical in-axis groupers, which need to be fully @@ -5642,13 +5535,11 @@ def sample( return self._selected_obj size = sample.process_sampling_size(n, frac, replace) if weights is not None: - weights_arr = sample.preprocess_weights( - self._selected_obj, weights, axis=self.axis - ) + weights_arr = sample.preprocess_weights(self._selected_obj, weights, axis=0) random_state = com.random_state(random_state) - group_iterator = self._grouper.get_iterator(self._selected_obj, self.axis) + group_iterator = self._grouper.get_iterator(self._selected_obj) sampled_indices = [] for labels, obj in group_iterator: @@ -5670,7 +5561,7 @@ def sample( sampled_indices.append(grp_indices[grp_sample]) sampled_indices = np.concatenate(sampled_indices) - return self._selected_obj.take(sampled_indices, axis=self.axis) + return self._selected_obj.take(sampled_indices, axis=0) def _idxmax_idxmin( self, @@ -5747,7 +5638,7 @@ def _idxmax_idxmin( return result def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT: - index = self.obj._get_axis(self.axis) + index = self.obj.index if res.size == 0: result = res.astype(index.dtype) else: @@ -5778,7 +5669,6 @@ def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT: def get_groupby( obj: NDFrame, by: _KeysArgType | None = None, - axis: AxisInt = 0, grouper: ops.BaseGrouper | None = None, group_keys: bool = True, ) -> GroupBy: @@ -5797,7 +5687,6 @@ def get_groupby( return klass( obj=obj, keys=by, - axis=axis, grouper=grouper, group_keys=group_keys, ) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 08ee786170674..4b9cf5ab75525 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -17,7 +17,6 @@ warn_copy_on_write, ) -from pandas._libs import lib from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly @@ -55,7 +54,6 @@ from pandas._typing import ( ArrayLike, - Axis, NDFrameT, npt, ) @@ -68,10 +66,10 @@ class Grouper: A Grouper allows the user to specify a groupby instruction for an object. This specification will select a column via the key parameter, or if the - level and/or axis parameters are given, a level of the index of the target + level parameter is given, a level of the index of the target object. - If `axis` and/or `level` are passed as keywords to both `Grouper` and + If ``level`` is passed as a keyword to both `Grouper` and `groupby`, the values passed to `Grouper` take precedence. Parameters @@ -85,8 +83,6 @@ class Grouper: (via key or level) is a datetime-like object. For full specification of available frequencies, please see `here `_. - axis : str, int, defaults to 0 - Number/name of the axis. sort : bool, default to False Whether to sort the resulting labels. closed : {'left' or 'right'} @@ -249,7 +245,7 @@ class Grouper: _gpr_index: Index | None _grouper: Index | None - _attributes: tuple[str, ...] = ("key", "level", "freq", "axis", "sort", "dropna") + _attributes: tuple[str, ...] = ("key", "level", "freq", "sort", "dropna") def __new__(cls, *args, **kwargs): if kwargs.get("freq") is not None: @@ -263,29 +259,12 @@ def __init__( key=None, level=None, freq=None, - axis: Axis | lib.NoDefault = lib.no_default, sort: bool = False, dropna: bool = True, ) -> None: - if type(self) is Grouper: - # i.e. not TimeGrouper - if axis is not lib.no_default: - warnings.warn( - "Grouper axis keyword is deprecated and will be removed in a " - "future version. To group on axis=1, use obj.T.groupby(...) " - "instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - axis = 0 - if axis is lib.no_default: - axis = 0 - self.key = key self.level = level self.freq = freq - self.axis = axis self.sort = sort self.dropna = dropna @@ -315,7 +294,6 @@ def _get_grouper( grouper, _, obj = get_grouper( obj, [self.key], - axis=self.axis, level=self.level, sort=self.sort, validate=validate, @@ -381,7 +359,7 @@ def _set_grouper( ax = Index(obj[key], name=key) else: - ax = obj._get_axis(self.axis) + ax = obj.index if self.level is not None: level = self.level @@ -404,7 +382,7 @@ def _set_grouper( kind="mergesort", na_position="first" ) ax = ax.take(indexer) - obj = obj.take(indexer, axis=self.axis) + obj = obj.take(indexer, axis=0) # error: Incompatible types in assignment (expression has type # "NDFrameT", variable has type "None") @@ -846,7 +824,6 @@ def groups(self) -> dict[Hashable, np.ndarray]: def get_grouper( obj: NDFrameT, key=None, - axis: Axis = 0, level=None, sort: bool = True, observed: bool = False, @@ -862,8 +839,8 @@ def get_grouper( Groupers are ultimately index mappings. They can originate as: index mappings, keys to columns, functions, or Groupers - Groupers enable local references to axis,level,sort, while - the passed in axis, level, and sort are 'global'. + Groupers enable local references to level,sort, while + the passed in level, and sort are 'global'. This routine tries to figure out what the passing in references are and then creates a Grouping for each one, combined into @@ -875,10 +852,10 @@ def get_grouper( If validate, then check for key/level overlaps. """ - group_axis = obj._get_axis(axis) + group_axis = obj.index # validate that the passed single level is compatible with the passed - # axis of the object + # index of the object if level is not None: # TODO: These if-block and else-block are almost same. # MultiIndex instance check is removable, but it seems that there are @@ -911,11 +888,8 @@ def get_grouper( raise ValueError("multiple levels only valid with MultiIndex") if isinstance(level, str): - if obj._get_axis(axis).name != level: - raise ValueError( - f"level name {level} is not the name " - f"of the {obj._get_axis_name(axis)}" - ) + if obj.index.name != level: + raise ValueError(f"level name {level} is not the name of the index") elif level > 0 or level < -1: raise ValueError("level > 0 or level < -1 only valid with MultiIndex") @@ -1028,14 +1002,14 @@ def is_in_obj(gpr) -> bool: elif is_in_axis(gpr): # df.groupby('name') if obj.ndim != 1 and gpr in obj: if validate: - obj._check_label_or_level_ambiguity(gpr, axis=axis) + obj._check_label_or_level_ambiguity(gpr, axis=0) in_axis, name, gpr = True, gpr, obj[gpr] if gpr.ndim != 1: # non-unique columns; raise here to get the name in the # exception message raise ValueError(f"Grouper for '{name}' not 1-dimensional") exclusions.add(name) - elif obj._is_level_reference(gpr, axis=axis): + elif obj._is_level_reference(gpr, axis=0): in_axis, level, gpr = False, gpr, None else: raise KeyError(gpr) diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index e2ddf9aa5c0c1..632ff7356d1c7 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -604,9 +604,7 @@ def __iter__(self) -> Iterator[Hashable]: def nkeys(self) -> int: return len(self.groupings) - def get_iterator( - self, data: NDFrameT, axis: AxisInt = 0 - ) -> Iterator[tuple[Hashable, NDFrameT]]: + def get_iterator(self, data: NDFrameT) -> Iterator[tuple[Hashable, NDFrameT]]: """ Groupby iterator @@ -615,12 +613,12 @@ def get_iterator( Generator yielding sequence of (name, subsetted object) for each group """ - splitter = self._get_splitter(data, axis=axis) + splitter = self._get_splitter(data) keys = self.group_keys_seq yield from zip(keys, splitter) @final - def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: + def _get_splitter(self, data: NDFrame) -> DataSplitter: """ Returns ------- @@ -633,7 +631,6 @@ def _get_splitter(self, data: NDFrame, axis: AxisInt = 0) -> DataSplitter: ngroups, sorted_ids=self._sorted_ids, sort_idx=self._sort_idx, - axis=axis, ) @final @@ -879,7 +876,7 @@ def _aggregate_series_pure_python( result = np.empty(ngroups, dtype="O") initialized = False - splitter = self._get_splitter(obj, axis=0) + splitter = self._get_splitter(obj) for i, group in enumerate(splitter): res = func(group) @@ -896,10 +893,10 @@ def _aggregate_series_pure_python( @final def apply_groupwise( - self, f: Callable, data: DataFrame | Series, axis: AxisInt = 0 + self, f: Callable, data: DataFrame | Series ) -> tuple[list, bool]: mutated = False - splitter = self._get_splitter(data, axis=axis) + splitter = self._get_splitter(data) group_keys = self.group_keys_seq result_values = [] @@ -917,7 +914,7 @@ def apply_groupwise( # group might be modified group_axes = group.axes res = f(group) - if not mutated and not _is_indexed_like(res, group_axes, axis): + if not mutated and not _is_indexed_like(res, group_axes): mutated = True result_values.append(res) # getattr pattern for __name__ is needed for functools.partial objects @@ -1024,7 +1021,7 @@ def codes_info(self) -> npt.NDArray[np.intp]: ids = ids[sorter] return ids - def get_iterator(self, data: NDFrame, axis: AxisInt = 0): + def get_iterator(self, data: NDFrame): """ Groupby iterator @@ -1033,12 +1030,7 @@ def get_iterator(self, data: NDFrame, axis: AxisInt = 0): Generator yielding sequence of (name, subsetted object) for each group """ - if axis == 0: - slicer = lambda start, edge: data.iloc[start:edge] - else: - slicer = lambda start, edge: data.iloc[:, start:edge] - - length = len(data.axes[axis]) + slicer = lambda start, edge: data.iloc[start:edge] start = 0 for edge, label in zip(self.bins, self.binlabels): @@ -1046,7 +1038,7 @@ def get_iterator(self, data: NDFrame, axis: AxisInt = 0): yield label, slicer(start, edge) start = edge - if start < length: + if start < len(data): yield self.binlabels[-1], slicer(start, None) @cache_readonly @@ -1110,13 +1102,13 @@ def groupings(self) -> list[grouper.Grouping]: return [ping] -def _is_indexed_like(obj, axes, axis: AxisInt) -> bool: +def _is_indexed_like(obj, axes) -> bool: if isinstance(obj, Series): if len(axes) > 1: return False - return obj.axes[axis].equals(axes[axis]) + return obj.index.equals(axes[0]) elif isinstance(obj, DataFrame): - return obj.axes[axis].equals(axes[axis]) + return obj.index.equals(axes[0]) return False @@ -1134,7 +1126,6 @@ def __init__( *, sort_idx: npt.NDArray[np.intp], sorted_ids: npt.NDArray[np.intp], - axis: AxisInt = 0, ) -> None: self.data = data self.labels = ensure_platform_int(labels) # _should_ already be np.intp @@ -1143,9 +1134,6 @@ def __init__( self._slabels = sorted_ids self._sort_idx = sort_idx - self.axis = axis - assert isinstance(axis, int), axis - def __iter__(self) -> Iterator: sdata = self._sorted_data @@ -1161,7 +1149,7 @@ def __iter__(self) -> Iterator: @cache_readonly def _sorted_data(self) -> NDFrameT: - return self.data.take(self._sort_idx, axis=self.axis) + return self.data.take(self._sort_idx, axis=0) def _chop(self, sdata, slice_obj: slice) -> NDFrame: raise AbstractMethodError(self) @@ -1179,11 +1167,8 @@ def _chop(self, sdata: Series, slice_obj: slice) -> Series: class FrameSplitter(DataSplitter): def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame: # Fastpath equivalent to: - # if self.axis == 0: - # return sdata.iloc[slice_obj] - # else: - # return sdata.iloc[:, slice_obj] - mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis) + # return sdata.iloc[slice_obj] + mgr = sdata._mgr.get_slice(slice_obj, axis=1) df = sdata._constructor_from_mgr(mgr, axes=mgr.axes) return df.__finalize__(sdata, method="groupby") @@ -1195,7 +1180,6 @@ def _get_splitter( *, sort_idx: npt.NDArray[np.intp], sorted_ids: npt.NDArray[np.intp], - axis: AxisInt = 0, ) -> DataSplitter: if isinstance(data, Series): klass: type[DataSplitter] = SeriesSplitter @@ -1203,6 +1187,4 @@ def _get_splitter( # i.e. DataFrame klass = FrameSplitter - return klass( - data, labels, ngroups, sort_idx=sort_idx, sorted_ids=sorted_ids, axis=axis - ) + return klass(data, labels, ngroups, sort_idx=sort_idx, sorted_ids=sorted_ids) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4d6507d89ec90..bf5b7e5906180 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -101,7 +101,6 @@ Any, AnyArrayLike, Axis, - AxisInt, Concatenate, Frequency, IndexLabel, @@ -134,7 +133,6 @@ class Resampler(BaseGroupBy, PandasObject): ---------- obj : Series or DataFrame groupby : TimeGrouper - axis : int, default 0 kind : str or None 'period', 'timestamp' to override default index treatment @@ -156,7 +154,6 @@ class Resampler(BaseGroupBy, PandasObject): # to the groupby descriptor _attributes = [ "freq", - "axis", "closed", "label", "convention", @@ -169,7 +166,6 @@ def __init__( self, obj: NDFrame, timegrouper: TimeGrouper, - axis: Axis = 0, kind=None, *, gpr_index: Index, @@ -180,7 +176,6 @@ def __init__( self._timegrouper = timegrouper self.keys = None self.sort = True - self.axis = obj._get_axis_number(axis) self.kind = kind self.group_keys = group_keys self.as_index = True @@ -449,7 +444,7 @@ def _gotitem(self, key, ndim: int, subset=None): assert subset.ndim == 1 grouped = get_groupby( - subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys + subset, by=None, grouper=grouper, group_keys=self.group_keys ) return grouped @@ -462,9 +457,7 @@ def _groupby_and_aggregate(self, how, *args, **kwargs): # Excludes `on` column when provided obj = self._obj_with_exclusions - grouped = get_groupby( - obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys - ) + grouped = get_groupby(obj, by=None, grouper=grouper, group_keys=self.group_keys) try: if callable(how): @@ -1801,12 +1794,7 @@ def _downsample(self, how, **kwargs): # we are downsampling # we want to call the actual grouper method here - if self.axis == 0: - result = obj.groupby(self._grouper).aggregate(how, **kwargs) - else: - # test_resample_axis1 - result = obj.T.groupby(self._grouper).aggregate(how, **kwargs).T - + result = obj.groupby(self._grouper).aggregate(how, **kwargs) return self._wrap_result(result) def _adjust_binner_for_upsample(self, binner): @@ -1837,8 +1825,6 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): .fillna: Fill NA/NaN values using the specified method. """ - if self.axis: - raise AssertionError("axis must be 0") if self._from_selection: raise ValueError( "Upsampling from level= or on= selection " @@ -2010,7 +1996,6 @@ def _upsample(self, method, limit: int | None = None, fill_value=None): obj, indexer, new_index, - axis=self.axis, ) return self._wrap_result(new_obj) @@ -2131,7 +2116,6 @@ def __init__( closed: Literal["left", "right"] | None = None, label: Literal["left", "right"] | None = None, how: str = "mean", - axis: Axis = 0, fill_method=None, limit: int | None = None, kind: str | None = None, @@ -2228,7 +2212,7 @@ def __init__( # always sort time groupers kwargs["sort"] = True - super().__init__(freq=freq, key=key, axis=axis, **kwargs) + super().__init__(freq=freq, key=key, **kwargs) def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: """ @@ -2255,7 +2239,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: obj, timegrouper=self, kind=kind, - axis=self.axis, group_keys=self.group_keys, gpr_index=ax, ) @@ -2279,7 +2262,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: obj, timegrouper=self, kind=kind, - axis=self.axis, group_keys=self.group_keys, gpr_index=ax, ) @@ -2287,7 +2269,6 @@ def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler: return TimedeltaIndexResampler( obj, timegrouper=self, - axis=self.axis, group_keys=self.group_keys, gpr_index=ax, ) @@ -2559,7 +2540,9 @@ def _set_grouper( def _take_new_index( - obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0 + obj: NDFrameT, + indexer: npt.NDArray[np.intp], + new_index: Index, ) -> NDFrameT: if isinstance(obj, ABCSeries): new_values = algos.take_nd(obj._values, indexer) @@ -2568,8 +2551,6 @@ def _take_new_index( new_values, index=new_index, name=obj.name ) elif isinstance(obj, ABCDataFrame): - if axis == 1: - raise NotImplementedError("axis 1 is not supported") new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1) return obj._constructor_from_mgr(new_mgr, axes=new_mgr.axes) else: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 22304bbdd1575..e53eea6f7f075 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -264,7 +264,7 @@ def _groupby_and_merge( if all(item in right.columns for item in by): rby = right.groupby(by, sort=False) - for key, lhs in lby._grouper.get_iterator(lby._selected_obj, axis=lby.axis): + for key, lhs in lby._grouper.get_iterator(lby._selected_obj): if rby is None: rhs = right else: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index ff973f6defc09..db28bfb1e9200 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -472,7 +472,7 @@ def _all_key(): margin_keys.append(all_key) else: - margin = data.groupby(level=0, axis=0, observed=observed).apply(aggfunc) + margin = data.groupby(level=0, observed=observed).apply(aggfunc) all_key = _all_key() table[all_key] = margin result = table diff --git a/pandas/core/series.py b/pandas/core/series.py index 7b56facc7a91f..657b384c57235 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2205,7 +2205,6 @@ def _set_name( def groupby( self, by=None, - axis: Axis = 0, level: IndexLabel | None = None, as_index: bool = True, sort: bool = True, @@ -2219,12 +2218,10 @@ def groupby( raise TypeError("You have to supply one of 'by' and 'level'") if not as_index: raise TypeError("as_index=False only valid with DataFrame") - axis = self._get_axis_number(axis) return SeriesGroupBy( obj=self, keys=by, - axis=axis, level=level, as_index=as_index, sort=sort, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 49b380e0af01e..9d693f034d911 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -102,15 +102,6 @@ the values are used as-is to determine the groups. A label or list of labels may be passed to group by the columns in ``self``. Notice that a tuple is interpreted as a (single) key. -axis : {0 or 'index', 1 or 'columns'}, default 0 - Split along rows (0) or columns (1). For `Series` this parameter - is unused and defaults to 0. - - .. deprecated:: 2.1.0 - - Will be removed and behave like axis=0 in a future version. - For ``axis=1``, do ``frame.T.groupby(...)`` instead. - level : int, level name, or sequence of such, default None If the axis is a MultiIndex (hierarchical), group by a particular level or levels. Do not specify both ``by`` and ``level``. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 9ebf32d3e536e..3c07fc156aea1 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -56,7 +56,6 @@ if TYPE_CHECKING: from pandas._typing import ( - Axis, TimedeltaConvertibleTypes, npt, ) @@ -204,13 +203,6 @@ class ExponentialMovingWindow(BaseWindow): [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``. - axis : {0, 1}, default 0 - If ``0`` or ``'index'``, calculate across the rows. - - If ``1`` or ``'columns'``, calculate across the columns. - - For `Series` this parameter is unused and defaults to 0. - times : np.ndarray, Series, default None Only applicable to ``mean()``. @@ -328,7 +320,6 @@ class ExponentialMovingWindow(BaseWindow): "min_periods", "adjust", "ignore_na", - "axis", "times", "method", ] @@ -343,7 +334,6 @@ def __init__( min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, - axis: Axis = 0, times: np.ndarray | NDFrame | None = None, method: str = "single", *, @@ -356,7 +346,6 @@ def __init__( center=False, closed=None, method=method, - axis=axis, selection=selection, ) self.com = com @@ -397,9 +386,7 @@ def __init__( "times is not None." ) # Without times, points are equally spaced - self._deltas = np.ones( - max(self.obj.shape[self.axis] - 1, 0), dtype=np.float64 - ) + self._deltas = np.ones(max(self.obj.shape[0] - 1, 0), dtype=np.float64) self._com = get_center_of_mass( # error: Argument 3 to "get_center_of_mass" has incompatible type # "Union[float, Any, None, timedelta64, signedinteger[_64Bit]]"; @@ -460,7 +447,6 @@ def online( min_periods=self.min_periods, adjust=self.adjust, ignore_na=self.ignore_na, - axis=self.axis, times=self.times, engine=engine, engine_kwargs=engine_kwargs, @@ -941,7 +927,6 @@ def __init__( min_periods: int | None = 0, adjust: bool = True, ignore_na: bool = False, - axis: Axis = 0, times: np.ndarray | NDFrame | None = None, engine: str = "numba", engine_kwargs: dict[str, bool] | None = None, @@ -961,13 +946,10 @@ def __init__( min_periods=min_periods, adjust=adjust, ignore_na=ignore_na, - axis=axis, times=times, selection=selection, ) - self._mean = EWMMeanState( - self._com, self.adjust, self.ignore_na, self.axis, obj.shape - ) + self._mean = EWMMeanState(self._com, self.adjust, self.ignore_na, obj.shape) if maybe_use_numba(engine): self.engine = engine self.engine_kwargs = engine_kwargs @@ -1055,7 +1037,7 @@ def mean(self, *args, update=None, update_times=None, **kwargs): if update_times is not None: raise NotImplementedError("update_times is not implemented.") update_deltas = np.ones( - max(self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64 + max(self._selected_obj.shape[-1] - 1, 0), dtype=np.float64 ) if update is not None: if self._mean.last_ewm is None: diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index aac10596ffc69..1bf26c482337c 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -36,7 +36,6 @@ if TYPE_CHECKING: from pandas._typing import ( - Axis, QuantileInterpolation, WindowingRankType, ) @@ -58,13 +57,6 @@ class Expanding(RollingAndExpandingMixin): Minimum number of observations in window required to have a value; otherwise, result is ``np.nan``. - axis : int or str, default 0 - If ``0`` or ``'index'``, roll across the rows. - - If ``1`` or ``'columns'``, roll across the columns. - - For `Series` this parameter is unused and defaults to 0. - method : str {'single', 'table'}, default 'single' Execute the rolling operation per single column or row (``'single'``) or over the entire object (``'table'``). @@ -119,20 +111,18 @@ class Expanding(RollingAndExpandingMixin): 4 7.0 """ - _attributes: list[str] = ["min_periods", "axis", "method"] + _attributes: list[str] = ["min_periods", "method"] def __init__( self, obj: NDFrame, min_periods: int = 1, - axis: Axis = 0, method: str = "single", selection=None, ) -> None: super().__init__( obj=obj, min_periods=min_periods, - axis=axis, method=method, selection=selection, ) diff --git a/pandas/core/window/numba_.py b/pandas/core/window/numba_.py index 9357945e78c63..eb06479fc325e 100644 --- a/pandas/core/window/numba_.py +++ b/pandas/core/window/numba_.py @@ -186,8 +186,7 @@ def generate_numba_table_func( Generate a numba jitted function to apply window calculations table-wise. Func will be passed a M window size x N number of columns array, and - must return a 1 x N number of columns array. Func is intended to operate - row-wise, but the result will be transposed for axis=1. + must return a 1 x N number of columns array. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline diff --git a/pandas/core/window/online.py b/pandas/core/window/online.py index 29d1f740e021f..72236bf5ccea2 100644 --- a/pandas/core/window/online.py +++ b/pandas/core/window/online.py @@ -87,15 +87,14 @@ def online_ewma( class EWMMeanState: - def __init__(self, com, adjust, ignore_na, axis, shape) -> None: + def __init__(self, com, adjust, ignore_na, shape) -> None: alpha = 1.0 / (1.0 + com) - self.axis = axis self.shape = shape self.adjust = adjust self.ignore_na = ignore_na self.new_wt = 1.0 if adjust else alpha self.old_wt_factor = 1.0 - alpha - self.old_wt = np.ones(self.shape[self.axis - 1]) + self.old_wt = np.ones(self.shape[-1]) self.last_ewm = None def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): @@ -114,5 +113,5 @@ def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): return result def reset(self) -> None: - self.old_wt = np.ones(self.shape[self.axis - 1]) + self.old_wt = np.ones(self.shape[-1]) self.last_ewm = None diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index e4502c031b1fb..72a61b2877809 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -100,7 +100,6 @@ from pandas._typing import ( ArrayLike, - Axis, NDFrameT, QuantileInterpolation, WindowingRankType, @@ -131,7 +130,6 @@ def __init__( min_periods: int | None = None, center: bool | None = False, win_type: str | None = None, - axis: Axis = 0, on: str | Index | None = None, closed: str | None = None, step: int | None = None, @@ -147,15 +145,10 @@ def __init__( self.min_periods = min_periods self.center = center self.win_type = win_type - self.axis = obj._get_axis_number(axis) if axis is not None else None self.method = method self._win_freq_i8: int | None = None if self.on is None: - if self.axis == 0: - self._on = self.obj.index - else: - # i.e. self.axis == 1 - self._on = self.obj.columns + self._on = self.obj.index elif isinstance(self.on, Index): self._on = self.on elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns: @@ -278,14 +271,8 @@ def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT: # filter out the on from the object if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2: obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False) - if obj.ndim > 1 and (numeric_only or self.axis == 1): - # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything - # to float to calculate the complete row at once. We exclude all non-numeric - # dtypes. + if obj.ndim > 1 and numeric_only: obj = self._make_numeric_only(obj) - if self.axis == 1: - obj = obj.astype("float64", copy=False) - obj._mgr = obj._mgr.consolidate() return obj def _gotitem(self, key, ndim, subset=None): @@ -477,9 +464,6 @@ def _apply_columnwise( obj = notna(obj).astype(int) obj._mgr = obj._mgr.consolidate() - if self.axis == 1: - obj = obj.T - taker = [] res_values = [] for i, arr in enumerate(obj._iter_column_arrays()): @@ -505,9 +489,6 @@ def _apply_columnwise( verify_integrity=False, ) - if self.axis == 1: - df = df.T - return self._resolve_output(df, obj) def _apply_tablewise( @@ -523,9 +504,7 @@ def _apply_tablewise( raise ValueError("method='table' not applicable for Series objects.") obj = self._create_data(self._selected_obj, numeric_only) values = self._prep_values(obj.to_numpy()) - values = values.T if self.axis == 1 else values result = homogeneous_func(values) - result = result.T if self.axis == 1 else result index = self._slice_axis_for_step(obj.index, result) columns = ( obj.columns @@ -633,8 +612,6 @@ def _numba_apply( else window_indexer.window_size ) obj = self._create_data(self._selected_obj) - if self.axis == 1: - obj = obj.T values = self._prep_values(obj.to_numpy()) if values.ndim == 1: values = values.reshape(-1, 1) @@ -660,7 +637,6 @@ def _numba_apply( result = aggregator( values.T, start=start, end=end, min_periods=min_periods, **func_kwargs ).T - result = result.T if self.axis == 1 else result index = self._slice_axis_for_step(obj.index, result) if obj.ndim == 1: result = result.squeeze() @@ -935,18 +911,6 @@ class Window(BaseWindow): Provided integer column is ignored and excluded from result since an integer index is not used to calculate the rolling window. - axis : int or str, default 0 - If ``0`` or ``'index'``, roll across the rows. - - If ``1`` or ``'columns'``, roll across the columns. - - For `Series` this parameter is unused and defaults to 0. - - .. deprecated:: 2.1.0 - - The axis keyword is deprecated. For ``axis=1``, - transpose the DataFrame first instead. - closed : str, default None If ``'right'``, the first point in the window is excluded from calculations. @@ -1138,7 +1102,6 @@ class Window(BaseWindow): "min_periods", "center", "win_type", - "axis", "on", "closed", "step", @@ -1858,7 +1821,6 @@ class Rolling(RollingAndExpandingMixin): "min_periods", "center", "win_type", - "axis", "on", "closed", "step", @@ -1926,10 +1888,7 @@ def _validate_datetimelike_monotonic(self) -> None: def _raise_monotonic_error(self, msg: str): on = self.on if on is None: - if self.axis == 0: - on = "index" - else: - on = "column" + on = "index" raise ValueError(f"{on} {msg}") @doc( diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 084452ec23719..b41e03d87b275 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -544,12 +544,7 @@ def boxplot_frame_groupby( maybe_adjust_figure(fig, bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) else: keys, frames = zip(*grouped) - if grouped.axis == 0: - df = pd.concat(frames, keys=keys, axis=1) - elif len(frames) > 1: - df = frames[0].join(frames[1::]) - else: - df = frames[0] + df = pd.concat(frames, keys=keys, axis=1) # GH 16748, DataFrameGroupby fails when subplots=False and `column` argument # is assigned, and in this case, since `df` here becomes MI after groupby, diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py index e9967b75becce..e9192dae66a46 100644 --- a/pandas/tests/apply/test_str.py +++ b/pandas/tests/apply/test_str.py @@ -270,7 +270,7 @@ def test_transform_groupby_kernel_series(request, string_series, op): @pytest.mark.parametrize("op", frame_transform_kernels) -def test_transform_groupby_kernel_frame(request, axis, float_frame, op): +def test_transform_groupby_kernel_frame(request, float_frame, op): if op == "ngroup": request.applymarker( pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame") @@ -279,22 +279,15 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op): # GH 35964 args = [0.0] if op == "fillna" else [] - if axis in (0, "index"): - ones = np.ones(float_frame.shape[0]) - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - else: - ones = np.ones(float_frame.shape[1]) - msg = "DataFrame.groupby with axis=1 is deprecated" - - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = float_frame.groupby(ones, axis=axis) + ones = np.ones(float_frame.shape[0]) + gb = float_frame.groupby(ones) warn = FutureWarning if op == "fillna" else None op_msg = "DataFrameGroupBy.fillna is deprecated" with tm.assert_produces_warning(warn, match=op_msg): expected = gb.transform(op, *args) - result = float_frame.transform(op, axis, *args) + result = float_frame.transform(op, 0, *args) tm.assert_frame_equal(result, expected) # same thing, but ensuring we have multiple blocks @@ -302,17 +295,10 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op): float_frame["E"] = float_frame["A"].copy() assert len(float_frame._mgr.arrays) > 1 - if axis in (0, "index"): - ones = np.ones(float_frame.shape[0]) - else: - ones = np.ones(float_frame.shape[1]) - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = float_frame.groupby(ones, axis=axis) - warn = FutureWarning if op == "fillna" else None - op_msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=op_msg): - expected2 = gb2.transform(op, *args) - result2 = float_frame.transform(op, axis, *args) + ones = np.ones(float_frame.shape[0]) + gb2 = float_frame.groupby(ones) + expected2 = gb2.transform(op, *args) + result2 = float_frame.transform(op, 0, *args) tm.assert_frame_equal(result2, expected2) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0e86f95a93091..c6962815ffda1 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -111,28 +111,6 @@ def test_groupby_aggregation_mixed_dtype(): tm.assert_frame_equal(result, expected) -def test_groupby_aggregation_multi_level_column(): - # GH 29772 - lst = [ - [True, True, True, False], - [True, False, np.nan, False], - [True, True, np.nan, False], - [True, True, np.nan, False], - ] - df = DataFrame( - data=lst, - columns=MultiIndex.from_tuples([("A", 0), ("A", 1), ("B", 0), ("B", 1)]), - ) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=1, axis=1) - result = gb.sum(numeric_only=False) - expected = DataFrame({0: [2.0, True, True, True], 1: [1, 0, 1, 1]}) - - tm.assert_frame_equal(result, expected) - - def test_agg_apply_corner(ts, tsframe): # nothing to group, all NA grouped = ts.groupby(ts * np.nan, group_keys=False) @@ -268,65 +246,6 @@ def test_agg_str_with_kwarg_axis_1_raises(df, reduction_func): gb.agg(reduction_func, axis=1) -@pytest.mark.parametrize( - "func, expected, dtype, result_dtype_dict", - [ - ("sum", [5, 7, 9], "int64", {}), - ("std", [4.5**0.5] * 3, int, {"i": float, "j": float, "k": float}), - ("var", [4.5] * 3, int, {"i": float, "j": float, "k": float}), - ("sum", [5, 7, 9], "Int64", {"j": "int64"}), - ("std", [4.5**0.5] * 3, "Int64", {"i": float, "j": float, "k": float}), - ("var", [4.5] * 3, "Int64", {"i": "float64", "j": "float64", "k": "float64"}), - ], -) -def test_multiindex_groupby_mixed_cols_axis1(func, expected, dtype, result_dtype_dict): - # GH#43209 - df = DataFrame( - [[1, 2, 3, 4, 5, 6]] * 3, - columns=MultiIndex.from_product([["a", "b"], ["i", "j", "k"]]), - ).astype({("a", "j"): dtype, ("b", "j"): dtype}) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=1, axis=1) - result = gb.agg(func) - expected = DataFrame([expected] * 3, columns=["i", "j", "k"]).astype( - result_dtype_dict - ) - - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "func, expected_data, result_dtype_dict", - [ - ("sum", [[2, 4], [10, 12], [18, 20]], {10: "int64", 20: "int64"}), - # std should ideally return Int64 / Float64 #43330 - ("std", [[2**0.5] * 2] * 3, "float64"), - ("var", [[2] * 2] * 3, {10: "float64", 20: "float64"}), - ], -) -def test_groupby_mixed_cols_axis1(func, expected_data, result_dtype_dict): - # GH#43209 - df = DataFrame( - np.arange(12).reshape(3, 4), - index=Index([0, 1, 0], name="y"), - columns=Index([10, 20, 10, 20], name="x"), - dtype="int64", - ).astype({10: "Int64"}) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("x", axis=1) - result = gb.agg(func) - expected = DataFrame( - data=expected_data, - index=Index([0, 1, 0], name="y"), - columns=Index([10, 20], name="x"), - ).astype(result_dtype_dict) - tm.assert_frame_equal(result, expected) - - def test_aggregate_item_by_item(df): grouped = df.groupby("A") @@ -1616,19 +1535,6 @@ def test_groupby_complex_raises(func): data.groupby(data.index % 2).agg(func) -@pytest.mark.parametrize( - "func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}] -) -def test_multi_axis_1_raises(func): - # GH#46995 - df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]}) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("a", axis=1) - with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"): - gb.agg(func) - - @pytest.mark.parametrize( "test, constant", [ diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 5c99882cef6d2..b1f8ecc9c8a39 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -126,21 +126,6 @@ def test_cython_agg_nothing_to_agg_with_dates(): frame.groupby("b").dates.mean(numeric_only=True) -def test_cython_agg_frame_columns(): - # #2113 - df = DataFrame({"x": [1, 2, 3], "y": [3, 4, 5]}) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - with tm.assert_produces_warning(FutureWarning, match=msg): - df.groupby(level=0, axis="columns").mean() - - def test_cython_agg_return_dict(): # GH 16741 df = DataFrame( diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index e73fb15a54181..274e23abd77d9 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -87,18 +87,6 @@ def test_frame_describe_multikey(tsframe): expected = pd.concat(desc_groups, axis=1) tm.assert_frame_equal(result, expected) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - groupedT = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) - result = groupedT.describe() - expected = tsframe.describe().T - # reverting the change from https://github.com/pandas-dev/pandas/pull/35441/ - expected.index = MultiIndex( - levels=[[0, 1], expected.index], - codes=[[0, 0, 1, 1], range(len(expected.index))], - ) - tm.assert_frame_equal(result, expected) - def test_frame_describe_tupleindex(): # GH 14848 - regression from 0.19.0 to 0.19.1 diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index 52d63cb720485..1b852abad6c8e 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -540,32 +540,6 @@ def test_groupby_head_tail(op, n, expected_rows, columns, as_index): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "op, n, expected_cols", - [ - ("head", -1, [0]), - ("head", 0, []), - ("head", 1, [0, 2]), - ("head", 7, [0, 1, 2]), - ("tail", -1, [1]), - ("tail", 0, []), - ("tail", 1, [1, 2]), - ("tail", 7, [0, 1, 2]), - ], -) -def test_groupby_head_tail_axis_1(op, n, expected_cols): - # GH 9772 - df = DataFrame( - [[1, 2, 3], [1, 4, 5], [2, 6, 7], [3, 8, 9]], columns=["A", "B", "C"] - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - g = df.groupby([0, 0, 1], axis=1) - expected = df.iloc[:, expected_cols] - result = getattr(g, op)(n) - tm.assert_frame_equal(result, expected) - - def test_group_selection_cache(): # GH 12839 nth, head, and tail should return same result consistently df = DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) @@ -773,24 +747,6 @@ def test_np_ints(slice_test_df, slice_test_grouped): tm.assert_frame_equal(result, expected) -def test_groupby_nth_with_column_axis(): - # GH43926 - df = DataFrame( - [ - [4, 5, 6], - [8, 8, 7], - ], - index=["z", "y"], - columns=["C", "B", "A"], - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(df.iloc[1], axis=1) - result = gb.nth(0) - expected = df.iloc[:, [0, 2]] - tm.assert_frame_equal(result, expected) - - def test_groupby_nth_interval(): # GH#24205 idx_result = MultiIndex( @@ -814,35 +770,6 @@ def test_groupby_nth_interval(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "start, stop, expected_values, expected_columns", - [ - (None, None, [0, 1, 2, 3, 4], list("ABCDE")), - (None, 1, [0, 3], list("AD")), - (None, 9, [0, 1, 2, 3, 4], list("ABCDE")), - (None, -1, [0, 1, 3], list("ABD")), - (1, None, [1, 2, 4], list("BCE")), - (1, -1, [1], list("B")), - (-1, None, [2, 4], list("CE")), - (-1, 2, [4], list("E")), - ], -) -@pytest.mark.parametrize("method", ["call", "index"]) -def test_nth_slices_with_column_axis( - start, stop, expected_values, expected_columns, method -): - df = DataFrame([range(5)], columns=[list("ABCDE")]) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([5, 5, 5, 6, 6], axis=1) - result = { - "call": lambda start, stop: gb.nth(slice(start, stop)), - "index": lambda start, stop: gb.nth[start:stop], - }[method](start, stop) - expected = DataFrame([expected_values], columns=[expected_columns]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.filterwarnings( "ignore:invalid value encountered in remainder:RuntimeWarning" ) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 1d2e639314cba..af0deba138469 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -377,32 +377,6 @@ def test_groupby_timedelta_quantile(): tm.assert_frame_equal(result, expected) -def test_columns_groupby_quantile(): - # GH 33795 - df = DataFrame( - np.arange(12).reshape(3, -1), - index=list("XYZ"), - columns=pd.Series(list("ABAB"), name="col"), - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby("col", axis=1) - result = gb.quantile(q=[0.8, 0.2]) - expected = DataFrame( - [ - [1.6, 0.4, 2.6, 1.4], - [5.6, 4.4, 6.6, 5.4], - [9.6, 8.4, 10.6, 9.4], - ], - index=list("XYZ"), - columns=pd.MultiIndex.from_tuples( - [("A", 0.8), ("A", 0.2), ("B", 0.8), ("B", 0.2)], names=["col", None] - ), - ) - - tm.assert_frame_equal(result, expected) - - def test_timestamp_groupby_quantile(unit): # GH 33168 dti = pd.date_range( diff --git a/pandas/tests/groupby/methods/test_size.py b/pandas/tests/groupby/methods/test_size.py index fd55ceedd1083..5a3eb49e97fb7 100644 --- a/pandas/tests/groupby/methods/test_size.py +++ b/pandas/tests/groupby/methods/test_size.py @@ -3,8 +3,6 @@ import pandas.util._test_decorators as td -from pandas.core.dtypes.common import is_integer_dtype - from pandas import ( DataFrame, Index, @@ -22,35 +20,6 @@ def test_size(df, by): assert result[key] == len(group) -@pytest.mark.parametrize( - "by", - [ - [0, 0, 0, 0], - [0, 1, 1, 1], - [1, 0, 1, 1], - [0, None, None, None], - pytest.param([None, None, None, None], marks=pytest.mark.xfail), - ], -) -@pytest.mark.parametrize("axis_1", [1, "columns"]) -def test_size_axis_1(df, axis_1, by, sort, dropna): - # GH#45715 - counts = {key: sum(value == key for value in by) for key in dict.fromkeys(by)} - if dropna: - counts = {key: value for key, value in counts.items() if key is not None} - expected = Series(counts, dtype="int64") - if sort: - expected = expected.sort_index() - if is_integer_dtype(expected.index.dtype) and not any(x is None for x in by): - expected.index = expected.index.astype(int) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna) - result = grouped.size() - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) def test_size_sort(sort, by): df = DataFrame(np.random.default_rng(2).choice(20, (1000, 3)), columns=list("ABC")) diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 42f949443e33f..4d610018917f6 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -232,14 +232,6 @@ def education_df(): ) -def test_axis(education_df): - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gp = education_df.groupby("country", axis=1) - with pytest.raises(NotImplementedError, match="axis"): - gp.value_counts() - - def test_bad_subset(education_df): gp = education_df.groupby("country") with pytest.raises(ValueError, match="subset"): diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 29070e686d91a..26b31e202e6e6 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -122,40 +122,6 @@ def test_apply_index_date_object(using_infer_string): tm.assert_series_equal(result, expected) -def test_apply_trivial(using_infer_string): - # GH 20066 - # trivial apply: ignore input and return a constant dataframe. - df = DataFrame( - {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, - columns=["key", "data"], - ) - dtype = "string" if using_infer_string else "object" - expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=["float64", dtype]) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([str(x) for x in df.dtypes], axis=1) - result = gb.apply(lambda x: df.iloc[1:]) - - tm.assert_frame_equal(result, expected) - - -def test_apply_trivial_fail(using_infer_string): - # GH 20066 - df = DataFrame( - {"key": ["a", "a", "b", "b", "a"], "data": [1.0, 2.0, 3.0, 4.0, 5.0]}, - columns=["key", "data"], - ) - dtype = "string" if using_infer_string else "object" - expected = pd.concat([df, df], axis=1, keys=["float64", dtype]) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([str(x) for x in df.dtypes], axis=1, group_keys=True) - result = gb.apply(lambda x: df) - - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "df, group_names", [ @@ -1257,31 +1223,6 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp(): assert type(val) is date -def test_apply_by_cols_equals_apply_by_rows_transposed(): - # GH 16646 - # Operating on the columns, or transposing and operating on the rows - # should give the same result. There was previously a bug where the - # by_rows operation would work fine, but by_cols would throw a ValueError - - df = DataFrame( - np.random.default_rng(2).random([6, 4]), - columns=MultiIndex.from_product([["A", "B"], [1, 2]]), - ) - - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.T.groupby(axis=0, level=0) - by_rows = gb.apply(lambda x: x.droplevel(axis=0, level=0)) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.groupby(axis=1, level=0) - by_cols = gb2.apply(lambda x: x.droplevel(axis=1, level=0)) - - tm.assert_frame_equal(by_cols, by_rows.T) - tm.assert_frame_equal(by_cols, df) - - def test_apply_dropna_with_indexed_same(dropna): # GH 38227 # GH#43205 diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index cfd1a4bca9d91..29d82cce44807 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -98,66 +98,3 @@ def fn(x): name="col2", ) tm.assert_series_equal(result, expected) - - -def test_apply_mutate_columns_multiindex(): - # GH 12652 - df = pd.DataFrame( - { - ("C", "julian"): [1, 2, 3], - ("B", "geoffrey"): [1, 2, 3], - ("A", "julian"): [1, 2, 3], - ("B", "julian"): [1, 2, 3], - ("A", "geoffrey"): [1, 2, 3], - ("C", "geoffrey"): [1, 2, 3], - }, - columns=pd.MultiIndex.from_tuples( - [ - ("A", "julian"), - ("A", "geoffrey"), - ("B", "julian"), - ("B", "geoffrey"), - ("C", "julian"), - ("C", "geoffrey"), - ] - ), - ) - - def add_column(grouped): - name = grouped.columns[0][1] - grouped["sum", name] = grouped.sum(axis=1) - return grouped - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(level=1, axis=1) - result = gb.apply(add_column) - expected = pd.DataFrame( - [ - [1, 1, 1, 3, 1, 1, 1, 3], - [2, 2, 2, 6, 2, 2, 2, 6], - [ - 3, - 3, - 3, - 9, - 3, - 3, - 3, - 9, - ], - ], - columns=pd.MultiIndex.from_tuples( - [ - ("geoffrey", "A", "geoffrey"), - ("geoffrey", "B", "geoffrey"), - ("geoffrey", "C", "geoffrey"), - ("geoffrey", "sum", "geoffrey"), - ("julian", "A", "julian"), - ("julian", "B", "julian"), - ("julian", "C", "julian"), - ("julian", "sum", "julian"), - ] - ), - ) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7db08c8879b0c..727a77f52fe48 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1348,22 +1348,6 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) -def test_groupby_categorical_axis_1(code): - # GH 13420 - df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) - cat = Categorical.from_codes(code, categories=list("abc")) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(cat, axis=1, observed=False) - result = gb.mean() - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.T.groupby(cat, axis=0, observed=False) - expected = gb2.mean().T - tm.assert_frame_equal(result, expected) - - def test_groupby_cat_preserves_structure(observed, ordered): # GH 28787 df = DataFrame( diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 309c4b7b57e84..a34170e9b55db 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -121,19 +121,6 @@ def raise_if_sum_is_zero(x): grouped.filter(raise_if_sum_is_zero) -def test_filter_with_axis_in_groupby(): - # issue 11041 - index = pd.MultiIndex.from_product([range(10), [0, 1]]) - data = DataFrame(np.arange(100).reshape(-1, 20), columns=index, dtype="int64") - - msg = "DataFrame.groupby with axis=1" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = data.groupby(level=0, axis=1) - result = gb.filter(lambda x: x.iloc[0, 0] > 10) - expected = data.iloc[:, 12:20] - tm.assert_frame_equal(result, expected) - - def test_filter_bad_shapes(): df = DataFrame({"A": np.arange(8), "B": list("aabbbbcc"), "C": np.arange(8)}) s = df["B"] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 399cebb0d3706..a06d104e7e44c 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -39,7 +39,7 @@ def test_repr(): # GH18203 result = repr(Grouper(key="A", level="B")) - expected = "Grouper(key='A', level='B', axis=0, sort=False, dropna=True)" + expected = "Grouper(key='A', level='B', sort=False, dropna=True)" assert result == expected @@ -288,29 +288,6 @@ def test_frame_groupby(tsframe): assert (samething == v).all() -def test_frame_groupby_columns(tsframe): - mapping = {"A": 0, "B": 0, "C": 1, "D": 1} - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = tsframe.groupby(mapping, axis=1) - - # aggregate - aggregated = grouped.aggregate("mean") - assert len(aggregated) == len(tsframe) - assert len(aggregated.columns) == 2 - - # transform - tf = lambda x: x - x.mean() - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - groupedT = tsframe.T.groupby(mapping, axis=0) - tm.assert_frame_equal(groupedT.transform(tf).T, grouped.transform(tf)) - - # iterate - for k, v in grouped: - assert len(v.columns) == 2 - - def test_frame_set_name_single(df): grouped = df.groupby("A") @@ -638,18 +615,6 @@ def test_groupby_as_index_series_scalar(df): tm.assert_frame_equal(result, expected) -def test_groupby_as_index_corner(df, ts): - msg = "as_index=False only valid with DataFrame" - with pytest.raises(TypeError, match=msg): - ts.groupby(lambda x: x.weekday(), as_index=False) - - msg = "as_index=False only valid for axis=0" - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.groupby(lambda x: x.lower(), as_index=False, axis=1) - - def test_groupby_multiple_key(): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), @@ -660,19 +625,6 @@ def test_groupby_multiple_key(): agged = grouped.sum() tm.assert_almost_equal(df.values, agged.values) - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - grouped = df.T.groupby( - [lambda x: x.year, lambda x: x.month, lambda x: x.day], axis=1 - ) - - agged = grouped.agg(lambda x: x.sum()) - tm.assert_index_equal(agged.index, df.columns) - tm.assert_almost_equal(df.T.values, agged.values) - - agged = grouped.agg(lambda x: x.sum()) - tm.assert_almost_equal(df.T.values, agged.values) - def test_groupby_multi_corner(df): # test that having an all-NA column doesn't mess you up @@ -703,14 +655,6 @@ def test_raises_on_nuisance(df): with pytest.raises(TypeError, match=msg): grouped.sum() - # won't work with axis = 1 - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1) - msg = "does not support reduction 'sum'" - with pytest.raises(TypeError, match=msg): - grouped.agg(lambda x: x.sum(0, numeric_only=False)) - @pytest.mark.parametrize( "agg_function", @@ -978,24 +922,12 @@ def test_groupby_with_hier_columns(): result = df.groupby(level=0).mean() tm.assert_index_equal(result.columns, columns) - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(level=0, axis=1) - result = gb.mean() - tm.assert_index_equal(result.index, df.index) - result = df.groupby(level=0).agg("mean") tm.assert_index_equal(result.columns, columns) result = df.groupby(level=0).apply(lambda x: x.mean()) tm.assert_index_equal(result.columns, columns) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(level=0, axis=1) - result = gb.agg(lambda x: x.mean(1)) - tm.assert_index_equal(result.columns, Index(["A", "B"])) - tm.assert_index_equal(result.index, df.index) - # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" @@ -1997,34 +1929,6 @@ def test_groupby_groups_in_BaseGrouper(): assert result.groups == expected.groups -@pytest.mark.parametrize("group_name", ["x", ["x"]]) -def test_groupby_axis_1(group_name): - # GH 27614 - df = DataFrame( - np.arange(12).reshape(3, 4), index=[0, 1, 0], columns=[10, 20, 10, 20] - ) - df.index.name = "y" - df.columns.name = "x" - - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(group_name, axis=1) - - results = gb.sum() - expected = df.T.groupby(group_name).sum().T - tm.assert_frame_equal(results, expected) - - # test on MI column - iterables = [["bar", "baz", "foo"], ["one", "two"]] - mi = MultiIndex.from_product(iterables=iterables, names=["x", "x1"]) - df = DataFrame(np.arange(18).reshape(3, 6), index=[0, 1, 0], columns=mi) - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(group_name, axis=1) - results = gb.sum() - expected = df.T.groupby(group_name).sum().T - tm.assert_frame_equal(results, expected) - - @pytest.mark.parametrize( "op, expected", [ @@ -2156,42 +2060,26 @@ def test_group_on_empty_multiindex(transformation_func, request): tm.assert_equal(result, expected) -def test_groupby_crash_on_nunique(axis): +def test_groupby_crash_on_nunique(): # Fix following 30253 dti = date_range("2016-01-01", periods=2, name="foo") df = DataFrame({("A", "B"): [1, 2], ("A", "C"): [1, 3], ("D", "B"): [0, 0]}) df.columns.names = ("bar", "baz") df.index = dti - axis_number = df._get_axis_number(axis) - if not axis_number: - df = df.T - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - else: - msg = "DataFrame.groupby with axis=1 is deprecated" - - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(axis=axis_number, level=0) + df = df.T + gb = df.groupby(level=0) result = gb.nunique() expected = DataFrame({"A": [1, 2], "D": [1, 1]}, index=dti) expected.columns.name = "bar" - if not axis_number: - expected = expected.T + expected = expected.T tm.assert_frame_equal(result, expected) - if axis_number == 0: - # same thing, but empty columns - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df[[]].groupby(axis=axis_number, level=0) - exp = expected[[]] - else: - # same thing, but empty rows - with tm.assert_produces_warning(FutureWarning, match=msg): - gb2 = df.loc[[]].groupby(axis=axis_number, level=0) - # default for empty when we can't infer a dtype is float64 - exp = expected.loc[[]].astype(np.float64) + # same thing, but empty columns + gb2 = df[[]].groupby(level=0) + exp = expected[[]] res = gb2.nunique() tm.assert_frame_equal(res, exp) @@ -2267,17 +2155,6 @@ def test_subsetting_columns_keeps_attrs(klass, attr, value): assert getattr(result, attr) == getattr(expected, attr) -def test_subsetting_columns_axis_1(): - # GH 37725 - df = DataFrame({"A": [1], "B": [2], "C": [3]}) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - g = df.groupby([0, 0, 1], axis=1) - match = "Cannot subset columns when using axis=1" - with pytest.raises(ValueError, match=match): - g[["A", "B"]].sum() - - @pytest.mark.parametrize("func", ["sum", "any", "shift"]) def test_groupby_column_index_name_lost(func): # GH: 29764 groupby loses index sometimes @@ -2992,29 +2869,6 @@ def test_groupby_ngroup_with_nan(): tm.assert_series_equal(result, expected) -def test_get_group_axis_1(): - # GH#54858 - df = DataFrame( - { - "col1": [0, 3, 2, 3], - "col2": [4, 1, 6, 7], - "col3": [3, 8, 2, 10], - "col4": [1, 13, 6, 15], - "col5": [-4, 5, 6, -7], - } - ) - with tm.assert_produces_warning(FutureWarning, match="deprecated"): - grouped = df.groupby(axis=1, by=[1, 2, 3, 2, 1]) - result = grouped.get_group(1) - expected = DataFrame( - { - "col1": [0, 3, 2, 3], - "col5": [-4, 5, 6, -7], - } - ) - tm.assert_frame_equal(result, expected) - - def test_groupby_ffill_with_duplicated_index(): # GH#43412 df = DataFrame({"a": [1, 2, 3, 4, np.nan, np.nan]}, index=[0, 1, 2, 0, 1, 2]) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 39d1ba207fba7..841dd29edab10 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -230,13 +230,6 @@ def test_grouper_creation_bug(self): result = g.sum() tm.assert_frame_equal(result, expected) - msg = "Grouper axis keyword is deprecated and will be removed" - with tm.assert_produces_warning(FutureWarning, match=msg): - gpr = Grouper(key="A", axis=0) - g = df.groupby(gpr) - result = g.sum() - tm.assert_frame_equal(result, expected) - msg = "DataFrameGroupBy.apply operated on the grouping columns" with tm.assert_produces_warning(DeprecationWarning, match=msg): result = g.apply(lambda x: x.sum()) @@ -386,22 +379,14 @@ def test_groupby_categorical_index_and_columns(self, observed): [[1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2], [1, 2, 1, 2]], int ) cat_columns = CategoricalIndex(columns, categories=categories, ordered=True) - df = DataFrame(data=data, columns=cat_columns) - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - result = df.groupby(axis=1, level=0, observed=observed).sum() expected_data = np.array([[4, 2], [4, 2], [4, 2], [4, 2], [4, 2]], int) expected_columns = CategoricalIndex( categories, categories=categories, ordered=True ) - expected = DataFrame(data=expected_data, columns=expected_columns) - tm.assert_frame_equal(result, expected) # test transposed version df = DataFrame(data.T, index=cat_columns) - msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(axis=0, level=0, observed=observed).sum() + result = df.groupby(level=0, observed=observed).sum() expected = DataFrame(data=expected_data.T, index=expected_columns) tm.assert_frame_equal(result, expected) @@ -529,18 +514,6 @@ def test_grouping_error_on_multidim_input(self, df): with pytest.raises(ValueError, match=msg): Grouping(df.index, df[["A", "A"]]) - def test_multiindex_passthru(self): - # GH 7997 - # regression from 0.14.1 - df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - df.columns = MultiIndex.from_tuples([(0, 1), (1, 1), (2, 1)]) - - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - gb = df.groupby(axis=1, level=[0, 1]) - result = gb.first() - tm.assert_frame_equal(result, df) - def test_multiindex_negative_level(self, multiindex_dataframe_random_data): # GH 13901 result = multiindex_dataframe_random_data.groupby(level=-1).sum() @@ -677,35 +650,20 @@ def test_groupby_level(self, sort, multiindex_dataframe_random_data, df): tm.assert_frame_equal(result0, expected0) tm.assert_frame_equal(result1, expected1) - # axis=1 - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum() - result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum() - tm.assert_frame_equal(result0, expected0.T) - tm.assert_frame_equal(result1, expected1.T) - # raise exception for non-MultiIndex msg = "level > 0 or level < -1 only valid with MultiIndex" with pytest.raises(ValueError, match=msg): df.groupby(level=1) - def test_groupby_level_index_names(self, axis): + def test_groupby_level_index_names(self): # GH4014 this used to raise ValueError since 'exp'>1 (in py2) df = DataFrame({"exp": ["A"] * 3 + ["B"] * 3, "var1": range(6)}).set_index( "exp" ) - if axis in (1, "columns"): - df = df.T - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - else: - depr_msg = "The 'axis' keyword in DataFrame.groupby is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.groupby(level="exp", axis=axis) - msg = f"level name foo is not the name of the {df._get_axis_name(axis)}" + df.groupby(level="exp") + msg = "level name foo is not the name of the index" with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - df.groupby(level="foo", axis=axis) + df.groupby(level="foo") def test_groupby_level_with_nas(self, sort): # GH 17537 @@ -1099,14 +1057,6 @@ def test_multi_iter_frame(self, three_group): groups = {key: gp for key, gp in grouped} # noqa: C416 assert len(groups) == 2 - # axis = 1 - three_levels = three_group.groupby(["A", "B", "C"]).mean() - depr_msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - grouped = three_levels.T.groupby(axis=1, level=(1, 2)) - for key, group in grouped: - pass - def test_dictify(self, df): dict(iter(df.groupby("A"))) dict(iter(df.groupby(["A", "B"]))) diff --git a/pandas/tests/groupby/test_indexing.py b/pandas/tests/groupby/test_indexing.py index f839bf156ca00..a3d3f509e186a 100644 --- a/pandas/tests/groupby/test_indexing.py +++ b/pandas/tests/groupby/test_indexing.py @@ -266,20 +266,6 @@ def test_step(step): tm.assert_frame_equal(result, expected) -def test_column_axis(): - column_group_df = pd.DataFrame( - [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]], - columns=["A", "B", "C", "D", "E", "F", "G"], - ) - msg = "DataFrame.groupby with axis=1" - with tm.assert_produces_warning(FutureWarning, match=msg): - g = column_group_df.groupby(column_group_df.iloc[1], axis=1) - result = g._positional_selector[1:-1] - expected = column_group_df.iloc[:, [1, 3]] - - tm.assert_frame_equal(result, expected) - - def test_columns_on_iter(): # GitHub issue #44821 df = pd.DataFrame({k: range(10) for k in "ABC"}) diff --git a/pandas/tests/groupby/test_numba.py b/pandas/tests/groupby/test_numba.py index ee7d342472493..3e32031e51138 100644 --- a/pandas/tests/groupby/test_numba.py +++ b/pandas/tests/groupby/test_numba.py @@ -61,13 +61,6 @@ def test_as_index_false_unsupported(self, numba_supported_reductions): with pytest.raises(NotImplementedError, match="as_index=False"): getattr(gb, func)(engine="numba", **kwargs) - def test_axis_1_unsupported(self, numba_supported_reductions): - func, kwargs = numba_supported_reductions - df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) - gb = df.groupby("a", axis=1) - with pytest.raises(NotImplementedError, match="axis=1"): - getattr(gb, func)(engine="numba", **kwargs) - def test_no_engine_doesnt_raise(self): # GH55520 df = DataFrame({"a": [3, 2, 3, 2], "b": range(4), "c": range(1, 5)}) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 08ce41edfb784..50103011693bc 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -1149,36 +1149,26 @@ def test_apply_to_nullable_integer_returns_float(values, function): "sem", ], ) -@pytest.mark.parametrize("axis", [0, 1]) -def test_regression_allowlist_methods(op, axis, skipna, sort): +def test_regression_allowlist_methods(op, skipna, sort): # GH6944 # GH 17537 # explicitly test the allowlist methods - raw_frame = DataFrame([0]) - if axis == 0: - frame = raw_frame - msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be" - else: - frame = raw_frame.T - msg = "DataFrame.groupby with axis=1 is deprecated" + frame = DataFrame([0]) - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = frame.groupby(level=0, axis=axis, sort=sort) + grouped = frame.groupby(level=0, sort=sort) if op == "skew": # skew has skipna result = getattr(grouped, op)(skipna=skipna) - expected = frame.groupby(level=0).apply( - lambda h: getattr(h, op)(axis=axis, skipna=skipna) - ) + expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(skipna=skipna)) if sort: - expected = expected.sort_index(axis=axis) + expected = expected.sort_index() tm.assert_frame_equal(result, expected) else: result = getattr(grouped, op)() - expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis)) + expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)()) if sort: - expected = expected.sort_index(axis=axis) + expected = expected.sort_index() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 8ef7c2b8ce859..b8891da388695 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -925,7 +925,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( # check that we will go through the singular_series path # in _wrap_applied_output_series assert gb.ngroups == 1 - assert gb._selected_obj._get_axis(gb.axis).nlevels == 1 + assert gb._selected_obj.index.nlevels == 1 # function that returns a Series msg = "DataFrameGroupBy.apply operated on the grouping columns" diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 3bccacf3dec6f..67bebddaa63ca 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -170,69 +170,9 @@ def test_transform_broadcast(tsframe, ts): for col in tsframe: assert_fp_equal(res[col], agged[col]) - # group columns - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = tsframe.groupby({"A": 0, "B": 0, "C": 1, "D": 1}, axis=1) - msg = "using DataFrameGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.transform(np.mean) - tm.assert_index_equal(result.index, tsframe.index) - tm.assert_index_equal(result.columns, tsframe.columns) - for _, gp in grouped: - agged = gp.mean(1) - res = result.reindex(columns=gp.columns) - for idx in gp.index: - assert_fp_equal(res.xs(idx), agged[idx]) - - -def test_transform_axis_1(request, transformation_func): - # GH 36308 - - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - args = get_groupby_method_args(transformation_func, df) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([0, 0, 1], axis=1) - warn = FutureWarning if transformation_func == "fillna" else None - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=msg): - result = gb.transform(transformation_func, *args) - msg = "DataFrameGroupBy.fillna is deprecated" - with tm.assert_produces_warning(warn, match=msg): - expected = df.T.groupby([0, 0, 1]).transform(transformation_func, *args).T - - if transformation_func in ["diff", "shift"]: - # Result contains nans, so transpose coerces to float - expected["b"] = expected["b"].astype("int64") - - # cumcount returns Series; the rest are DataFrame - tm.assert_equal(result, expected) - - -def test_transform_axis_1_reducer(request, reduction_func): - # GH#45715 - if reduction_func in ( - "corrwith", - "ngroup", - "nth", - ): - marker = pytest.mark.xfail(reason="transform incorrectly fails - GH#45986") - request.applymarker(marker) - - df = DataFrame({"a": [1, 2], "b": [3, 4], "c": [5, 6]}, index=["x", "y"]) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby([0, 0, 1], axis=1) - - result = gb.transform(reduction_func) - expected = df.T.groupby([0, 0, 1]).transform(reduction_func).T - tm.assert_equal(result, expected) - def test_transform_axis_ts(tsframe): - # make sure that we are setting the axes - # correctly when on axis=0 or 1 + # make sure that we are setting the axes correctly # in the presence of a non-monotonic indexer # GH12713 @@ -252,14 +192,6 @@ def test_transform_axis_ts(tsframe): expected = grouped.apply(lambda x: x - x.mean(axis=0)) tm.assert_frame_equal(result, expected) - ts = ts.T - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False) - result = ts - grouped.transform("mean") - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - tm.assert_frame_equal(result, expected) - # non-monotonic ts = tso.iloc[[1, 0] + list(range(2, len(base)))] grouped = ts.groupby(lambda x: x.weekday(), group_keys=False) @@ -267,14 +199,6 @@ def test_transform_axis_ts(tsframe): expected = grouped.apply(lambda x: x - x.mean(axis=0)) tm.assert_frame_equal(result, expected) - ts = ts.T - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = ts.groupby(lambda x: x.weekday(), axis=1, group_keys=False) - result = ts - grouped.transform("mean") - expected = grouped.apply(lambda x: (x.T - x.mean(1)).T) - tm.assert_frame_equal(result, expected) - def test_transform_dtype(): # GH 9807 @@ -894,38 +818,6 @@ def test_cython_transform_frame_column( tm.assert_series_equal(expected, res2) -def test_transform_with_non_scalar_group(): - # GH 10165 - cols = MultiIndex.from_tuples( - [ - ("syn", "A"), - ("foo", "A"), - ("non", "A"), - ("syn", "C"), - ("foo", "C"), - ("non", "C"), - ("syn", "T"), - ("foo", "T"), - ("non", "T"), - ("syn", "G"), - ("foo", "G"), - ("non", "G"), - ] - ) - df = DataFrame( - np.random.default_rng(2).integers(1, 10, (4, 12)), - columns=cols, - index=["A", "C", "G", "T"], - ) - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = df.groupby(axis=1, level=1) - msg = "transform must return a scalar value for each group.*" - with pytest.raises(ValueError, match=msg): - gb.transform(lambda z: z.div(z.sum(axis=1), axis=0)) - - @pytest.mark.parametrize( "cols,expected", [ @@ -1330,7 +1222,7 @@ def func(grp): # Check that the fastpath raises, see _transform_general obj = gb._obj_with_exclusions - gen = gb._grouper.get_iterator(obj, axis=gb.axis) + gen = gb._grouper.get_iterator(obj) fast_path, slow_path = gb._define_paths(func) _, group = next(gen) diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 2470aae78d701..abafad5b1d7da 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -426,25 +426,6 @@ def test_boxplot_legacy2_return_type(self): axes = _check_plot_works(grouped.boxplot, subplots=False, return_type="axes") _check_axes_shape(axes, axes_num=1, layout=(1, 1)) - @pytest.mark.parametrize( - "subplots, warn, axes_num, layout", - [[True, UserWarning, 3, (2, 2)], [False, None, 1, (1, 1)]], - ) - def test_boxplot_legacy3(self, subplots, warn, axes_num, layout): - tuples = zip(string.ascii_letters[:10], range(10)) - df = DataFrame( - np.random.default_rng(2).random((10, 3)), - index=MultiIndex.from_tuples(tuples), - ) - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = df.unstack(level=1).groupby(level=0, axis=1) - with tm.assert_produces_warning(warn, check_stacklevel=False): - axes = _check_plot_works( - grouped.boxplot, subplots=subplots, return_type="axes" - ) - _check_axes_shape(axes, axes_num=axes_num, layout=layout) - def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.default_rng(2).normal(166, 20, size=n)) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index dfcdc2ce26bcf..c5ef0f39ece19 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -639,26 +639,6 @@ def test_resample_ohlc_dataframe(unit): # df.columns = ['PRICE', 'PRICE'] -def test_resample_dup_index(): - # GH 4812 - # dup columns with resample raising - df = DataFrame( - np.random.default_rng(2).standard_normal((4, 12)), - index=[2000, 2000, 2000, 2000], - columns=[Period(year=2000, month=i + 1, freq="M") for i in range(12)], - ) - df.iloc[3, :] = np.nan - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - result = df.resample("QE", axis=1).mean() - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.groupby(lambda x: int((x.month - 1) / 3), axis=1).mean() - expected.columns = [Period(year=2000, quarter=i + 1, freq="Q") for i in range(4)] - tm.assert_frame_equal(result, expected) - - def test_resample_reresample(unit): dti = date_range( start=datetime(2005, 1, 1), end=datetime(2005, 1, 10), freq="D" @@ -737,21 +717,6 @@ def test_asfreq_non_unique(unit): ts.asfreq("B") -def test_resample_axis1(unit): - rng = date_range("1/1/2000", "2/29/2000").as_unit(unit) - df = DataFrame( - np.random.default_rng(2).standard_normal((3, len(rng))), - columns=rng, - index=["a", "b", "c"], - ) - - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - result = df.resample("ME", axis=1).mean() - expected = df.T.resample("ME").mean().T - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("freq", ["min", "5min", "15min", "30min", "4h", "12h"]) def test_resample_anchored_ticks(freq, unit): # If a fixed delta (5 minute, 4 hour) evenly divides a day, we should diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 12abd1c98784b..17c286c4651e6 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -35,13 +35,13 @@ def test_frame(dti, _test_series): def test_str(_test_series): r = _test_series.resample("h") assert ( - "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "DatetimeIndexResampler [freq=, closed=left, " "label=left, convention=start, origin=start_day]" in str(r) ) r = _test_series.resample("h", origin="2000-01-01") assert ( - "DatetimeIndexResampler [freq=, axis=0, closed=left, " + "DatetimeIndexResampler [freq=, closed=left, " "label=left, convention=start, origin=2000-01-01 00:00:00]" in str(r) ) @@ -620,26 +620,6 @@ def test_agg_specificationerror_invalid_names(cases): cases[["A"]].agg({"A": ["sum", "std"], "B": ["mean", "std"]}) -@pytest.mark.parametrize( - "func", [["min"], ["mean", "max"], {"A": "sum"}, {"A": "prod", "B": "median"}] -) -def test_multi_agg_axis_1_raises(func): - # GH#46904 - - index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") - index.name = "date" - df = DataFrame( - np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index - ).T - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - res = df.resample("ME", axis=1) - with pytest.raises( - NotImplementedError, match="axis other than 0 is not supported" - ): - res.agg(func) - - def test_agg_nested_dicts(): index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") index.name = "date" @@ -1047,37 +1027,6 @@ def test_args_kwargs_depr(method, raises): func(*args, 1, 2, 3, 4) -def test_df_axis_param_depr(): - index = date_range(datetime(2005, 1, 1), datetime(2005, 1, 10), freq="D") - index.name = "date" - df = DataFrame( - np.random.default_rng(2).random((10, 2)), columns=list("AB"), index=index - ).T - - # Deprecation error when axis=1 is explicitly passed - warning_msg = "DataFrame.resample with axis=1 is deprecated." - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - df.resample("ME", axis=1) - - # Deprecation error when axis=0 is explicitly passed - df = df.T - warning_msg = ( - "The 'axis' keyword in DataFrame.resample is deprecated and " - "will be removed in a future version." - ) - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - df.resample("ME", axis=0) - - -def test_series_axis_param_depr(_test_series): - warning_msg = ( - "The 'axis' keyword in Series.resample is " - "deprecated and will be removed in a future version." - ) - with tm.assert_produces_warning(FutureWarning, match=warning_msg): - _test_series.resample("h", axis=0) - - def test_resample_empty(): # GH#52484 df = DataFrame( diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index 3f9340b800eae..c5e202f36659b 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -296,7 +296,7 @@ def test_repr(): # GH18203 result = repr(Grouper(key="A", freq="h")) expected = ( - "TimeGrouper(key='A', freq=, axis=0, sort=True, dropna=True, " + "TimeGrouper(key='A', freq=, sort=True, dropna=True, " "closed='left', label='left', how='mean', " "convention='e', origin='start_day')" ) @@ -304,7 +304,7 @@ def test_repr(): result = repr(Grouper(key="A", freq="h", origin="2000-01-01")) expected = ( - "TimeGrouper(key='A', freq=, axis=0, sort=True, dropna=True, " + "TimeGrouper(key='A', freq=, sort=True, dropna=True, " "closed='left', label='left', how='mean', " "convention='e', origin=Timestamp('2000-01-01 00:00:00'))" ) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 6644ec82fab17..fda51b157cd75 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -28,16 +28,6 @@ def test_reindex_level(self, multiindex_year_month_day_dataframe_random_data): expected = ymd["A"].groupby(level="month").transform("sum") tm.assert_series_equal(result, expected, check_names=False) - # axis=1 - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - gb = ymd.T.groupby("month", axis=1) - - month_sums = gb.sum() - result = month_sums.reindex(columns=ymd.index, level=1) - expected = ymd.groupby(level="month").transform("sum").T - tm.assert_frame_equal(result, expected) - def test_reindex(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data @@ -97,27 +87,6 @@ def test_groupby_corner(self): # should work df.groupby(level="three") - def test_groupby_level_no_obs(self): - # #1697 - midx = MultiIndex.from_tuples( - [ - ("f1", "s1"), - ("f1", "s2"), - ("f2", "s1"), - ("f2", "s2"), - ("f3", "s1"), - ("f3", "s2"), - ] - ) - df = DataFrame([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]], columns=midx) - df1 = df.loc(axis=1)[df.columns.map(lambda u: u[0] in ["f2", "f3"])] - - msg = "DataFrame.groupby with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = df1.groupby(axis=1, level=0) - result = grouped.sum() - assert (result.columns == ["f2", "f3"]).all() - def test_setitem_with_expansion_multiindex_columns( self, multiindex_year_month_day_dataframe_random_data ): diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index fe2da210c6fe9..b4d555203212e 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -127,19 +127,6 @@ def test_agg(step): tm.assert_frame_equal(result, expected, check_like=True) -@pytest.mark.parametrize( - "func", [["min"], ["mean", "max"], {"b": "sum"}, {"b": "prod", "c": "median"}] -) -def test_multi_axis_1_raises(func): - # GH#46904 - df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5], "c": [6, 7, 8]}) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - r = df.rolling(window=3, axis=1) - with pytest.raises(NotImplementedError, match="axis other than 0 is not supported"): - r.agg(func) - - def test_agg_apply(raw): # passed lambda df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) @@ -352,32 +339,6 @@ def test_dont_modify_attributes_after_methods( assert result == expected -def test_centered_axis_validation(step): - # ok - msg = "The 'axis' keyword in Series.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - Series(np.ones(10)).rolling(window=3, center=True, axis=0, step=step).mean() - - # bad axis - msg = "No axis named 1 for object type Series" - with pytest.raises(ValueError, match=msg): - Series(np.ones(10)).rolling(window=3, center=True, axis=1, step=step).mean() - - # ok ok - df = DataFrame(np.ones((10, 10))) - msg = "The 'axis' keyword in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.rolling(window=3, center=True, axis=0, step=step).mean() - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.rolling(window=3, center=True, axis=1, step=step).mean() - - # bad axis - msg = "No axis named 2 for object type DataFrame" - with pytest.raises(ValueError, match=msg): - (df.rolling(window=3, center=True, axis=2, step=step).mean()) - - def test_rolling_min_min_periods(step): a = Series([1, 2, 3, 4, 5]) result = a.rolling(window=100, min_periods=1, step=step).min() diff --git a/pandas/tests/window/test_apply.py b/pandas/tests/window/test_apply.py index 136f81632cb0a..2398713585cfb 100644 --- a/pandas/tests/window/test_apply.py +++ b/pandas/tests/window/test_apply.py @@ -316,13 +316,3 @@ def test_center_reindex_frame(raw): ) frame_rs = frame.rolling(window=25, min_periods=minp, center=True).apply(f, raw=raw) tm.assert_frame_equal(frame_xp, frame_rs) - - -def test_axis1(raw): - # GH 45912 - df = DataFrame([1, 2]) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=1, axis=1).apply(np.sum, raw=raw) - expected = DataFrame([1.0, 2.0]) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index d7c72105a673b..2e2cfa156019f 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -158,56 +158,6 @@ def test_ewma_times_adjust_false_raises(): ) -@pytest.mark.parametrize( - "func, expected", - [ - [ - "mean", - DataFrame( - { - 0: range(5), - 1: range(4, 9), - 2: [7.428571, 9, 10.571429, 12.142857, 13.714286], - }, - dtype=float, - ), - ], - [ - "std", - DataFrame( - { - 0: [np.nan] * 5, - 1: [4.242641] * 5, - 2: [4.6291, 5.196152, 5.781745, 6.380775, 6.989788], - } - ), - ], - [ - "var", - DataFrame( - { - 0: [np.nan] * 5, - 1: [18.0] * 5, - 2: [21.428571, 27, 33.428571, 40.714286, 48.857143], - } - ), - ], - ], -) -def test_float_dtype_ewma(func, expected, float_numpy_dtype): - # GH#42452 - - df = DataFrame( - {0: range(5), 1: range(6, 11), 2: range(10, 20, 2)}, dtype=float_numpy_dtype - ) - msg = "Support for axis=1 in DataFrame.ewm is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - e = df.ewm(alpha=0.5, axis=1) - result = getattr(e, func)() - - tm.assert_frame_equal(result, expected) - - def test_times_string_col_raises(): # GH 43265 df = DataFrame( diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 9174307cec5d1..6d452b27f3654 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -79,23 +79,14 @@ def test_missing_minp_zero(): tm.assert_series_equal(result, expected) -def test_expanding_axis(axis): +def test_expanding(): # see gh-23372. df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis) - if axis == 0: - msg = "The 'axis' keyword in DataFrame.expanding is deprecated" - expected = DataFrame( - {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} - ) - else: - # axis == 1 - msg = "Support for axis=1 in DataFrame.expanding is deprecated" - expected = DataFrame([[np.nan] * 2 + [float(i) for i in range(3, 21)]] * 10) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.expanding(3, axis=axis).sum() + expected = DataFrame( + {i: [np.nan] * 2 + [float(j) for j in range(3, 11)] for i in range(20)} + ) + result = df.expanding(3).sum() tm.assert_frame_equal(result, expected) @@ -329,9 +320,7 @@ def test_expanding_corr_pairwise(frame): def test_expanding_func(func, static_comp, frame_or_series): data = frame_or_series(np.array(list(range(10)) + [np.nan] * 10)) - msg = "The 'axis' keyword in (Series|DataFrame).expanding is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - obj = data.expanding(min_periods=1, axis=0) + obj = data.expanding(min_periods=1) result = getattr(obj, func)() assert isinstance(result, frame_or_series) @@ -355,33 +344,26 @@ def test_expanding_func(func, static_comp, frame_or_series): def test_expanding_min_periods(func, static_comp): ser = Series(np.random.default_rng(2).standard_normal(50)) - msg = "The 'axis' keyword in Series.expanding is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser.expanding(min_periods=30, axis=0), func)() + result = getattr(ser.expanding(min_periods=30), func)() assert result[:29].isna().all() tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) # min_periods is working correctly - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser.expanding(min_periods=15, axis=0), func)() + result = getattr(ser.expanding(min_periods=15), func)() assert isna(result.iloc[13]) assert notna(result.iloc[14]) ser2 = Series(np.random.default_rng(2).standard_normal(20)) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser2.expanding(min_periods=5, axis=0), func)() + result = getattr(ser2.expanding(min_periods=5), func)() assert isna(result[3]) assert notna(result[4]) # min_periods=0 - with tm.assert_produces_warning(FutureWarning, match=msg): - result0 = getattr(ser.expanding(min_periods=0, axis=0), func)() - with tm.assert_produces_warning(FutureWarning, match=msg): - result1 = getattr(ser.expanding(min_periods=1, axis=0), func)() + result0 = getattr(ser.expanding(min_periods=0), func)() + result1 = getattr(ser.expanding(min_periods=1), func)() tm.assert_almost_equal(result0, result1) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = getattr(ser.expanding(min_periods=1, axis=0), func)() + result = getattr(ser.expanding(min_periods=1), func)() tm.assert_almost_equal(result.iloc[-1], static_comp(ser[:50])) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 139e1ff7f65fd..650eb911e410b 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -328,7 +328,6 @@ def f(x): def test_table_method_rolling_methods( self, - axis, nogil, parallel, nopython, @@ -340,16 +339,14 @@ def test_table_method_rolling_methods( engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(np.eye(3)) - roll_table = df.rolling(2, method="table", axis=axis, min_periods=0, step=step) + roll_table = df.rolling(2, method="table", min_periods=0, step=step) if method in ("var", "std"): with pytest.raises(NotImplementedError, match=f"{method} not supported"): getattr(roll_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) else: - roll_single = df.rolling( - 2, method="single", axis=axis, min_periods=0, step=step - ) + roll_single = df.rolling(2, method="single", min_periods=0, step=step) result = getattr(roll_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) @@ -358,19 +355,19 @@ def test_table_method_rolling_methods( ) tm.assert_frame_equal(result, expected) - def test_table_method_rolling_apply(self, axis, nogil, parallel, nopython, step): + def test_table_method_rolling_apply(self, nogil, parallel, nopython, step): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): return np.sum(x, axis=0) + 1 df = DataFrame(np.eye(3)) - result = df.rolling( - 2, method="table", axis=axis, min_periods=0, step=step - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") - expected = df.rolling( - 2, method="single", axis=axis, min_periods=0, step=step - ).apply(f, raw=True, engine_kwargs=engine_kwargs, engine="numba") + result = df.rolling(2, method="table", min_periods=0, step=step).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) + expected = df.rolling(2, method="single", min_periods=0, step=step).apply( + f, raw=True, engine_kwargs=engine_kwargs, engine="numba" + ) tm.assert_frame_equal(result, expected) def test_table_method_rolling_weighted_mean(self, step): @@ -393,37 +390,37 @@ def weighted_mean(x): )[::step] tm.assert_frame_equal(result, expected) - def test_table_method_expanding_apply(self, axis, nogil, parallel, nopython): + def test_table_method_expanding_apply(self, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} def f(x): return np.sum(x, axis=0) + 1 df = DataFrame(np.eye(3)) - result = df.expanding(method="table", axis=axis).apply( + result = df.expanding(method="table").apply( f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) - expected = df.expanding(method="single", axis=axis).apply( + expected = df.expanding(method="single").apply( f, raw=True, engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) def test_table_method_expanding_methods( - self, axis, nogil, parallel, nopython, arithmetic_numba_supported_operators + self, nogil, parallel, nopython, arithmetic_numba_supported_operators ): method, kwargs = arithmetic_numba_supported_operators engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(np.eye(3)) - expand_table = df.expanding(method="table", axis=axis) + expand_table = df.expanding(method="table") if method in ("var", "std"): with pytest.raises(NotImplementedError, match=f"{method} not supported"): getattr(expand_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) else: - expand_single = df.expanding(method="single", axis=axis) + expand_single = df.expanding(method="single") result = getattr(expand_table, method)( engine_kwargs=engine_kwargs, engine="numba", **kwargs ) @@ -434,15 +431,15 @@ def test_table_method_expanding_methods( @pytest.mark.parametrize("data", [np.eye(3), np.ones((2, 3)), np.ones((3, 2))]) @pytest.mark.parametrize("method", ["mean", "sum"]) - def test_table_method_ewm(self, data, method, axis, nogil, parallel, nopython): + def test_table_method_ewm(self, data, method, nogil, parallel, nopython): engine_kwargs = {"nogil": nogil, "parallel": parallel, "nopython": nopython} df = DataFrame(data) - result = getattr(df.ewm(com=1, method="table", axis=axis), method)( + result = getattr(df.ewm(com=1, method="table"), method)( engine_kwargs=engine_kwargs, engine="numba" ) - expected = getattr(df.ewm(com=1, method="single", axis=axis), method)( + expected = getattr(df.ewm(com=1, method="single"), method)( engine_kwargs=engine_kwargs, engine="numba" ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 0ca6bf0de94dd..fda631987255a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -21,8 +21,6 @@ Timestamp, date_range, period_range, - to_datetime, - to_timedelta, ) import pandas._testing as tm from pandas.api.indexers import BaseIndexer @@ -594,39 +592,20 @@ def test_multi_index_names(): assert result.index.names == [None, "1", "2"] -def test_rolling_axis_sum(axis): +def test_rolling_axis_sum(): # see gh-23372. df = DataFrame(np.ones((10, 20))) - axis = df._get_axis_number(axis) - - if axis == 0: - msg = "The 'axis' keyword in DataFrame.rolling" - expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) - else: - # axis == 1 - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - expected = DataFrame([[np.nan] * 2 + [3.0] * 18] * 10) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(3, axis=axis).sum() + expected = DataFrame({i: [np.nan] * 2 + [3.0] * 8 for i in range(20)}) + result = df.rolling(3).sum() tm.assert_frame_equal(result, expected) -def test_rolling_axis_count(axis): +def test_rolling_axis_count(): # see gh-26055 df = DataFrame({"x": range(3), "y": range(3)}) - axis = df._get_axis_number(axis) - - if axis in [0, "index"]: - msg = "The 'axis' keyword in DataFrame.rolling" - expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) - else: - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - expected = DataFrame({"x": [1.0, 1.0, 1.0], "y": [2.0, 2.0, 2.0]}) - - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(2, axis=axis, min_periods=0).count() + expected = DataFrame({"x": [1.0, 2.0, 2.0], "y": [1.0, 2.0, 2.0]}) + result = df.rolling(2, min_periods=0).count() tm.assert_frame_equal(result, expected) @@ -639,21 +618,14 @@ def test_readonly_array(): tm.assert_series_equal(result, expected) -def test_rolling_datetime(axis, tz_naive_fixture): +def test_rolling_datetime(tz_naive_fixture): # GH-28192 tz = tz_naive_fixture df = DataFrame( {i: [1] * 2 for i in date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)} ) - if axis in [0, "index"]: - msg = "The 'axis' keyword in DataFrame.rolling" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.T.rolling("2D", axis=axis).sum().T - else: - msg = "Support for axis=1 in DataFrame.rolling" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling("2D", axis=axis).sum() + result = df.T.rolling("2D").sum().T expected = DataFrame( { **{ @@ -1065,75 +1037,6 @@ def test_rolling_numerical_too_large_numbers(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize( - ("func", "value"), - [("sum", 2.0), ("max", 1.0), ("min", 1.0), ("mean", 1.0), ("median", 1.0)], -) -def test_rolling_mixed_dtypes_axis_1(func, value): - # GH: 20649 - df = DataFrame(1, index=[1, 2], columns=["a", "b", "c"]) - df["c"] = 1.0 - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - roll = df.rolling(window=2, min_periods=1, axis=1) - result = getattr(roll, func)() - expected = DataFrame( - {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, - index=[1, 2], - ) - tm.assert_frame_equal(result, expected) - - -def test_rolling_axis_one_with_nan(): - # GH: 35596 - df = DataFrame( - [ - [0, 1, 2, 4, np.nan, np.nan, np.nan], - [0, 1, 2, np.nan, np.nan, np.nan, np.nan], - [0, 2, 2, np.nan, 2, np.nan, 1], - ] - ) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=7, min_periods=1, axis="columns").sum() - expected = DataFrame( - [ - [0.0, 1.0, 3.0, 7.0, 7.0, 7.0, 7.0], - [0.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0], - [0.0, 2.0, 4.0, 4.0, 6.0, 6.0, 7.0], - ] - ) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "value", - ["test", to_datetime("2019-12-31"), to_timedelta("1 days 06:05:01.00003")], -) -def test_rolling_axis_1_non_numeric_dtypes(value): - # GH: 20649 - df = DataFrame({"a": [1, 2]}) - df["b"] = value - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=2, min_periods=1, axis=1).sum() - expected = DataFrame({"a": [1.0, 2.0]}) - tm.assert_frame_equal(result, expected) - - -def test_rolling_on_df_transposed(): - # GH: 32724 - df = DataFrame({"A": [1, None], "B": [4, 5], "C": [7, 8]}) - expected = DataFrame({"A": [1.0, np.nan], "B": [5.0, 5.0], "C": [11.0, 13.0]}) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(min_periods=1, window=2, axis=1).sum() - tm.assert_frame_equal(result, expected) - - result = df.T.rolling(min_periods=1, window=2).sum().T - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( ("index", "window"), [ @@ -1576,56 +1479,6 @@ def test_rolling_zero_window(): tm.assert_series_equal(result, expected) -def test_rolling_float_dtype(float_numpy_dtype): - # GH#42452 - df = DataFrame({"A": range(5), "B": range(10, 15)}, dtype=float_numpy_dtype) - expected = DataFrame( - {"A": [np.nan] * 5, "B": range(10, 20, 2)}, - dtype=float_numpy_dtype, - ) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(2, axis=1).sum() - tm.assert_frame_equal(result, expected, check_dtype=False) - - -def test_rolling_numeric_dtypes(): - # GH#41779 - df = DataFrame(np.arange(40).reshape(4, 10), columns=list("abcdefghij")).astype( - { - "a": "float16", - "b": "float32", - "c": "float64", - "d": "int8", - "e": "int16", - "f": "int32", - "g": "uint8", - "h": "uint16", - "i": "uint32", - "j": "uint64", - } - ) - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=2, min_periods=1, axis=1).min() - expected = DataFrame( - { - "a": range(0, 40, 10), - "b": range(0, 40, 10), - "c": range(1, 40, 10), - "d": range(2, 40, 10), - "e": range(3, 40, 10), - "f": range(4, 40, 10), - "g": range(5, 40, 10), - "h": range(6, 40, 10), - "i": range(7, 40, 10), - "j": range(8, 40, 10), - }, - dtype="float64", - ) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize("window", [1, 3, 10, 20]) @pytest.mark.parametrize("method", ["min", "max", "average"]) @pytest.mark.parametrize("pct", [True, False]) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index bd0fadeb3e475..820b0134cc577 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -689,17 +689,11 @@ def test_rolling_on_multi_index_level(self): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("msg, axis", [["column", 1], ["index", 0]]) -def test_nat_axis_error(msg, axis): +def test_nat_axis_error(): idx = [Timestamp("2020"), NaT] - kwargs = {"columns" if axis == 1 else "index": idx} - df = DataFrame(np.eye(2), **kwargs) - warn_msg = "The 'axis' keyword in DataFrame.rolling is deprecated" - if axis == 1: - warn_msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with pytest.raises(ValueError, match=f"{msg} values must not have NaT"): - with tm.assert_produces_warning(FutureWarning, match=warn_msg): - df.rolling("D", axis=axis).mean() + df = DataFrame(np.eye(2), index=idx) + with pytest.raises(ValueError, match="index values must not have NaT"): + df.rolling("D").mean() @td.skip_if_no("pyarrow") diff --git a/pandas/tests/window/test_win_type.py b/pandas/tests/window/test_win_type.py index 5c785ed3fccb2..574dfc34b6d26 100644 --- a/pandas/tests/window/test_win_type.py +++ b/pandas/tests/window/test_win_type.py @@ -668,20 +668,3 @@ def test_weighted_var_big_window_no_segfault(win_types, center): expected = Series(np.nan) tm.assert_series_equal(result, expected) - - -def test_rolling_center_axis_1(): - pytest.importorskip("scipy") - df = DataFrame( - {"a": [1, 1, 0, 0, 0, 1], "b": [1, 0, 0, 1, 0, 0], "c": [1, 0, 0, 1, 0, 1]} - ) - - msg = "Support for axis=1 in DataFrame.rolling is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.rolling(window=3, axis=1, win_type="boxcar", center=True).sum() - - expected = DataFrame( - {"a": [np.nan] * 6, "b": [3.0, 1.0, 0.0, 2.0, 0.0, 2.0], "c": [np.nan] * 6} - ) - - tm.assert_frame_equal(result, expected, check_dtype=True) From 0fd71768147d6cace22f99c6fca7dc19679337ff Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Thu, 1 Feb 2024 19:04:28 +0000 Subject: [PATCH 05/50] BUG: pandas int extension dtypes has no attribute byteorder (#57173) * support nullable integers in from_dataframe * gh issue number * use BaseMaskedDtype * only skip if int8[pyarrow] --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/core/interchange/column.py | 3 +++ pandas/tests/interchange/test_impl.py | 12 ++++++++++++ 3 files changed, 16 insertions(+) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 9002d9af2c602..56e645d4c55db 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -30,6 +30,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ +- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for Nullable integers (:issue:`55069`) - Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for empty inputs (:issue:`56700`) - Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`) diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 508cd74c57288..350cab2c56013 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -11,6 +11,7 @@ from pandas.core.dtypes.dtypes import ( ArrowDtype, + BaseMaskedDtype, DatetimeTZDtype, ) @@ -143,6 +144,8 @@ def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]: byteorder = dtype.numpy_dtype.byteorder elif isinstance(dtype, DatetimeTZDtype): byteorder = dtype.base.byteorder # type: ignore[union-attr] + elif isinstance(dtype, BaseMaskedDtype): + byteorder = dtype.numpy_dtype.byteorder else: byteorder = dtype.byteorder diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 76f80c20fa217..c8f286f22a43e 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -8,6 +8,7 @@ is_ci_environment, is_platform_windows, ) +import pandas.util._test_decorators as td import pandas as pd import pandas._testing as tm @@ -394,6 +395,17 @@ def test_large_string(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))] +) +def test_nullable_integers(dtype: str) -> None: + # https://github.com/pandas-dev/pandas/issues/55069 + df = pd.DataFrame({"a": [1]}, dtype=dtype) + expected = pd.DataFrame({"a": [1]}, dtype="int8") + result = pd.api.interchange.from_dataframe(df.__dataframe__()) + tm.assert_frame_equal(result, expected) + + def test_empty_dataframe(): # https://github.com/pandas-dev/pandas/issues/56700 df = pd.DataFrame({"a": []}, dtype="int8") From d9f9e12199bbd5826406e9a6adef62b68867af61 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:34:49 -0700 Subject: [PATCH 06/50] DOC: fix PR02 errors in docstrings - groupby.idxmax (#57145) --- ci/code_checks.sh | 2 - pandas/core/groupby/generic.py | 96 +++++++++++++++++++++++++++++++++- 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c3ee7a4e262fc..50baa1cb0b19f 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -173,8 +173,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.transform\ pandas.core.groupby.DataFrameGroupBy.nth\ pandas.core.groupby.DataFrameGroupBy.rolling\ - pandas.core.groupby.SeriesGroupBy.idxmax\ - pandas.core.groupby.SeriesGroupBy.idxmin\ pandas.core.groupby.SeriesGroupBy.nth\ pandas.core.groupby.SeriesGroupBy.rolling\ pandas.core.groupby.DataFrameGroupBy.hist\ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e0811c914864b..32810d57a436e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1033,12 +1033,104 @@ def nsmallest( result = self._python_apply_general(f, data, not_indexed_same=True) return result - @doc(Series.idxmin.__doc__) def idxmin(self, skipna: bool = True) -> Series: + """ + Return the row label of the minimum value. + + If multiple values equal the minimum, the first row label with that + value is returned. + + Parameters + ---------- + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + + Returns + ------- + Index + Label of the minimum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmin : Return indices of the minimum values + along the given axis. + DataFrame.idxmin : Return index of first occurrence of minimum + over requested axis. + Series.idxmax : Return index *label* of the first occurrence + of maximum of values. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + + >>> ser.groupby(['a', 'a', 'b', 'b']).idxmin() + a 2023-01-01 + b 2023-02-01 + dtype: datetime64[ns] + """ return self._idxmax_idxmin("idxmin", skipna=skipna) - @doc(Series.idxmax.__doc__) def idxmax(self, skipna: bool = True) -> Series: + """ + Return the row label of the maximum value. + + If multiple values equal the maximum, the first row label with that + value is returned. + + Parameters + ---------- + skipna : bool, default True + Exclude NA/null values. If the entire Series is NA, the result + will be NA. + + Returns + ------- + Index + Label of the maximum value. + + Raises + ------ + ValueError + If the Series is empty. + + See Also + -------- + numpy.argmax : Return indices of the maximum values + along the given axis. + DataFrame.idxmax : Return index of first occurrence of maximum + over requested axis. + Series.idxmin : Return index *label* of the first occurrence + of minimum of values. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( + ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser + 2023-01-01 1 + 2023-01-15 2 + 2023-02-01 3 + 2023-02-15 4 + dtype: int64 + + >>> ser.groupby(['a', 'a', 'b', 'b']).idxmax() + a 2023-01-15 + b 2023-02-15 + dtype: datetime64[ns] + """ return self._idxmax_idxmin("idxmax", skipna=skipna) @doc(Series.corr.__doc__) From bb42fc0c700533a67f03ea189c24e57d87d48410 Mon Sep 17 00:00:00 2001 From: Zhengbo Wang <77875500+luke396@users.noreply.github.com> Date: Fri, 2 Feb 2024 05:37:06 +0800 Subject: [PATCH 07/50] ENH: Add check for character limit (#57103) Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/generic.py | 4 ++++ pandas/io/excel/_base.py | 10 +++++++++- pandas/tests/io/excel/test_writers.py | 12 ++++++++++++ 4 files changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c5ac2a800223b..f9117253b61c1 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0afc1b607a8dc..78fbb66635dd1 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2334,6 +2334,10 @@ def to_excel( Once a workbook has been saved it is not possible to write further data without rewriting the whole workbook. + pandas will check the number of rows, columns, + and cell character count does not exceed Excel's limitations. + All other limitations must be checked by the user. + Examples -------- diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c7642236d4b2a..4109b6d0965bb 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1326,7 +1326,15 @@ def _value_with_fmt( fmt = "0" else: val = str(val) - + # GH#56954 + # Excel's limitation on cell contents is 32767 characters + # xref https://support.microsoft.com/en-au/office/excel-specifications-and-limits-1672b34d-7043-467e-8e27-269d656771c3 + if len(val) > 32767: + warnings.warn( + "Cell contents too long, truncated to 32767 characters", + UserWarning, + stacklevel=find_stack_level(), + ) return val, fmt @classmethod diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 6aea7464ee8dc..6ea48cd759fbc 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1385,6 +1385,18 @@ def test_to_excel_empty_frame(self, engine, ext): expected = DataFrame() tm.assert_frame_equal(result, expected) + def test_to_excel_raising_warning_when_cell_character_exceed_limit( + self, path, engine + ): + # GH#56954 + df = DataFrame({"A": ["a" * 32768]}) + msg = "Cell contents too long, truncated to 32767 characters" + with tm.assert_produces_warning( + UserWarning, match=msg, raise_on_extra_warnings=False + ): + buf = BytesIO() + df.to_excel(buf) + class TestExcelWriterEngineTests: @pytest.mark.parametrize( From 1d1672d1ecb5d108e45338de837d0604edba2859 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 1 Feb 2024 19:53:05 -0500 Subject: [PATCH 08/50] CI: Add macOS M1 CI (#57163) * CI: Start testing on M1 * install pytest-localserver from pip * typo * more CI adjustments and try to fix wheel builders * more fixes * auto workers on windows as well * fix wheel builders * Update wheels.yml * bump cibuildwheel * bump both * maybe fix windows as well --- .github/workflows/unit-tests.yml | 9 +++++---- .github/workflows/wheels.yml | 18 ++++++++++-------- ci/deps/actions-310.yaml | 2 +- ci/deps/actions-311.yaml | 3 +-- ci/deps/actions-312.yaml | 2 +- ci/deps/actions-39.yaml | 2 +- pyproject.toml | 4 ---- 7 files changed, 19 insertions(+), 21 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 2b09aa9343b79..0fffedcb6ae88 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -202,7 +202,8 @@ jobs: timeout-minutes: 90 strategy: matrix: - os: [macos-latest, windows-latest] + # Note: Don't use macOS latest since macos 14 appears to be arm64 only + os: [macos-13, macos-14, windows-latest] env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml] fail-fast: false runs-on: ${{ matrix.os }} @@ -215,8 +216,7 @@ jobs: PANDAS_CI: 1 PYTEST_TARGET: pandas PATTERN: "not slow and not db and not network and not single_cpu" - # GH 47443: PYTEST_WORKERS > 0 crashes Windows builds with memory related errors - PYTEST_WORKERS: ${{ matrix.os == 'macos-latest' && 'auto' || '0' }} + PYTEST_WORKERS: 'auto' steps: - name: Checkout @@ -342,7 +342,8 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macOS-latest, windows-latest] + # Separate out macOS 13 and 14, since macOS 14 is arm64 only + os: [ubuntu-22.04, macOS-13, macOS-14, windows-latest] timeout-minutes: 90 diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6d3b9048a2122..f79b2c51b5f92 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -94,7 +94,9 @@ jobs: buildplat: - [ubuntu-22.04, manylinux_x86_64] - [ubuntu-22.04, musllinux_x86_64] - - [macos-12, macosx_*] + - [macos-12, macosx_x86_64] + # Note: M1 images on Github Actions start from macOS 14 + - [macos-14, macosx_arm64] - [windows-2022, win_amd64] # TODO: support PyPy? python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"]] @@ -128,7 +130,7 @@ jobs: # Python version used to build sdist doesn't matter # wheel will be built from sdist with the correct version - name: Unzip sdist (macOS) - if: ${{ matrix.buildplat[1] == 'macosx_*' }} + if: ${{ startsWith(matrix.buildplat[1], 'macosx') }} run: | tar -xzf ./dist/${{ env.sdist_name }} -C ./dist @@ -139,18 +141,18 @@ jobs: - name: Build normal wheels if: ${{ (env.IS_SCHEDULE_DISPATCH != 'true' || env.IS_PUSH == 'true') }} - uses: pypa/cibuildwheel@v2.16.4 + uses: pypa/cibuildwheel@v2.16.5 with: - package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }} + package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} env: CIBW_PRERELEASE_PYTHONS: True CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} - name: Build nightly wheels (with NumPy pre-release) if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }} - uses: pypa/cibuildwheel@v2.16.4 + uses: pypa/cibuildwheel@v2.16.5 with: - package-dir: ./dist/${{ matrix.buildplat[1] == 'macosx_*' && env.sdist_name || needs.build_sdist.outputs.sdist_file }} + package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} env: # The nightly wheels should be build witht he NumPy 2.0 pre-releases # which requires the additional URL. @@ -183,7 +185,7 @@ jobs: $TST_CMD = @" python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0; python -m pip install `$(Get-Item pandas\wheelhouse\*.whl); - python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`'; + python -c `'import pandas as pd; pd.test(extra_args=[`\"--no-strict-data-files`\", `\"-m not clipboard and not single_cpu and not slow and not network and not db`\"])`'; "@ # add rc to the end of the image name if the Python version is unreleased docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} @@ -191,7 +193,7 @@ jobs: - uses: actions/upload-artifact@v4 with: - name: ${{ matrix.python[0] }}-${{ startsWith(matrix.buildplat[1], 'macosx') && 'macosx' || matrix.buildplat[1] }} + name: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} path: ./wheelhouse/*.whl - name: Upload wheels & sdist diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 45f114322015b..a3e44e6373145 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-localserver>=0.7.1 - pytest-qt>=4.2.0 - boto3 @@ -61,3 +60,4 @@ dependencies: - adbc-driver-postgresql>=0.8.0 - adbc-driver-sqlite>=0.8.0 - tzdata>=2022.7 + - pytest-localserver>=0.7.1 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index d14686696e669..95cd1a4d46ef4 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-localserver>=0.7.1 - pytest-qt>=4.2.0 - boto3 @@ -60,4 +59,4 @@ dependencies: - pip: - adbc-driver-postgresql>=0.8.0 - adbc-driver-sqlite>=0.8.0 - - tzdata>=2022.7 + - pytest-localserver>=0.7.1 diff --git a/ci/deps/actions-312.yaml b/ci/deps/actions-312.yaml index 86aaf24b4e15c..a442ed6feeb5d 100644 --- a/ci/deps/actions-312.yaml +++ b/ci/deps/actions-312.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-localserver>=0.7.1 - pytest-qt>=4.2.0 - boto3 @@ -61,3 +60,4 @@ dependencies: - adbc-driver-postgresql>=0.8.0 - adbc-driver-sqlite>=0.8.0 - tzdata>=2022.7 + - pytest-localserver>=0.7.1 diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 31ee74174cd46..b162a78e7f115 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -14,7 +14,6 @@ dependencies: - pytest>=7.3.2 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-localserver>=0.7.1 - pytest-qt>=4.2.0 - boto3 @@ -61,3 +60,4 @@ dependencies: - adbc-driver-postgresql>=0.8.0 - adbc-driver-sqlite>=0.8.0 - tzdata>=2022.7 + - pytest-localserver>=0.7.1 diff --git a/pyproject.toml b/pyproject.toml index 96e8ea90afa59..12d4c7c267428 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,10 +161,6 @@ test-command = """ pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \ """ -[tool.cibuildwheel.macos] -archs = "x86_64 arm64" -test-skip = "*_arm64" - [tool.cibuildwheel.windows] before-build = "pip install delvewheel" repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" From 8ed7dae2a3a76bdae0da402c12111bf033124b55 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Fri, 2 Feb 2024 01:25:43 +0000 Subject: [PATCH 09/50] BUG: Interchange protocol implementation allows non-string column names (#57174) * convert non-string colnames to strings in interchange protocol * remove irrelevant statement * informative error message if two columns end up becoming duplicates --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/core/interchange/column.py | 8 ++++++++ pandas/core/interchange/dataframe.py | 2 +- pandas/tests/interchange/test_impl.py | 26 ++++++++++++++++++++++++-- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 56e645d4c55db..13d5024b5a131 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -32,6 +32,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for Nullable integers (:issue:`55069`) - Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for empty inputs (:issue:`56700`) +- Fixed bug in :func:`pandas.api.interchange.from_dataframe` which wasn't converting columns names to strings (:issue:`55069`) - Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/interchange/column.py b/pandas/core/interchange/column.py index 350cab2c56013..2e60b2a2138e3 100644 --- a/pandas/core/interchange/column.py +++ b/pandas/core/interchange/column.py @@ -77,6 +77,14 @@ def __init__(self, column: pd.Series, allow_copy: bool = True) -> None: Note: doesn't deal with extension arrays yet, just assume a regular Series/ndarray for now. """ + if isinstance(column, pd.DataFrame): + raise TypeError( + "Expected a Series, got a DataFrame. This likely happened " + "because you called __dataframe__ on a DataFrame which, " + "after converting column names to string, resulted in duplicated " + f"names: {column.columns}. Please rename these columns before " + "using the interchange protocol." + ) if not isinstance(column, pd.Series): raise NotImplementedError(f"Columns of type {type(column)} not handled yet") diff --git a/pandas/core/interchange/dataframe.py b/pandas/core/interchange/dataframe.py index 4f08b2c2b3a7b..1ffe0e8e8dbb0 100644 --- a/pandas/core/interchange/dataframe.py +++ b/pandas/core/interchange/dataframe.py @@ -32,7 +32,7 @@ def __init__(self, df: DataFrame, allow_copy: bool = True) -> None: Constructor - an instance of this (private) class is returned from `pd.DataFrame.__dataframe__`. """ - self._df = df + self._df = df.rename(columns=str, copy=False) self._allow_copy = allow_copy def __dataframe__( diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index c8f286f22a43e..4933cca97462f 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -162,8 +162,6 @@ def test_missing_from_masked(): } ) - df2 = df.__dataframe__() - rng = np.random.default_rng(2) dict_null = {col: rng.integers(low=0, high=len(df)) for col in df.columns} for col, num_nulls in dict_null.items(): @@ -395,6 +393,30 @@ def test_large_string(): tm.assert_frame_equal(result, expected) +def test_non_str_names(): + # https://github.com/pandas-dev/pandas/issues/56701 + df = pd.Series([1, 2, 3], name=0).to_frame() + names = df.__dataframe__().column_names() + assert names == ["0"] + + +def test_non_str_names_w_duplicates(): + # https://github.com/pandas-dev/pandas/issues/56701 + df = pd.DataFrame({"0": [1, 2, 3], 0: [4, 5, 6]}) + dfi = df.__dataframe__() + with pytest.raises( + TypeError, + match=( + "Expected a Series, got a DataFrame. This likely happened because you " + "called __dataframe__ on a DataFrame which, after converting column " + r"names to string, resulted in duplicated names: Index\(\['0', '0'\], " + r"dtype='object'\). Please rename these columns before using the " + "interchange protocol." + ), + ): + pd.api.interchange.from_dataframe(dfi, allow_copy=False) + + @pytest.mark.parametrize( "dtype", ["Int8", pytest.param("Int8[pyarrow]", marks=td.skip_if_no("pyarrow"))] ) From f20c5c987c8cedb78df0bacddc1264e2d24c9b83 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Fri, 2 Feb 2024 04:51:18 -0700 Subject: [PATCH 10/50] DOC: fix PR02 errors in docstrings - pandas.core.groupby.SeriesGroupBy.transform and pandas.core.groupby.DataFrameGroupBy.transform (#57210) --- ci/code_checks.sh | 2 -- pandas/core/groupby/groupby.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 50baa1cb0b19f..9e1239c72776e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -169,8 +169,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.Interval\ pandas.Grouper\ pandas.core.groupby.SeriesGroupBy.apply\ - pandas.core.groupby.SeriesGroupBy.transform\ - pandas.core.groupby.DataFrameGroupBy.transform\ pandas.core.groupby.DataFrameGroupBy.nth\ pandas.core.groupby.DataFrameGroupBy.rolling\ pandas.core.groupby.SeriesGroupBy.nth\ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c4ae47348a64c..68626534f1e74 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -486,7 +486,7 @@ class providing the base-class of operations. Parameters ---------- -f : function, str +func : function, str Function to apply to each group. See the Notes section below for requirements. Accepted inputs are: From 70f47ee66a91730971d28f2edb7003f5cddc2994 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Fri, 2 Feb 2024 04:59:13 -0700 Subject: [PATCH 11/50] DOC: fix PR02 errors in docstrings - pandas.core.window.rolling.Rolling.quantile, pandas.core.window.expanding.Expanding.quantile (#57211) --- ci/code_checks.sh | 4 +--- pandas/core/window/expanding.py | 4 ++-- pandas/core/window/rolling.py | 4 ++-- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 9e1239c72776e..a09c4662a1fd9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -176,9 +176,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.core.groupby.DataFrameGroupBy.hist\ pandas.core.groupby.DataFrameGroupBy.plot\ pandas.core.groupby.DataFrameGroupBy.corrwith\ - pandas.core.groupby.SeriesGroupBy.plot\ - pandas.core.window.rolling.Rolling.quantile\ - pandas.core.window.expanding.Expanding.quantile # There should be no backslash in the final line, please keep this comment in the last ignored function + pandas.core.groupby.SeriesGroupBy.plot # There should be no backslash in the final line, please keep this comment in the last ignored function RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py index 1bf26c482337c..c048c4a506629 100644 --- a/pandas/core/window/expanding.py +++ b/pandas/core/window/expanding.py @@ -664,11 +664,11 @@ def kurt(self, numeric_only: bool = False): create_section_header("Parameters"), dedent( """ - quantile : float + q : float Quantile to compute. 0 <= quantile <= 1. .. deprecated:: 2.1.0 - This will be renamed to 'q' in a future version. + This was renamed from 'quantile' to 'q' in version 2.1.0. interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 72a61b2877809..b55432085b928 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2502,11 +2502,11 @@ def kurt(self, numeric_only: bool = False): create_section_header("Parameters"), dedent( """ - quantile : float + q : float Quantile to compute. 0 <= quantile <= 1. .. deprecated:: 2.1.0 - This will be renamed to 'q' in a future version. + This was renamed from 'quantile' to 'q' in version 2.1.0. interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}} This optional parameter specifies the interpolation method to use, when the desired quantile lies between two data points `i` and `j`: From 47418a134b53e4ccefce7b1c80eea18811793159 Mon Sep 17 00:00:00 2001 From: Chris Hewitt <4896702+chris-hewitt@users.noreply.github.com> Date: Fri, 2 Feb 2024 07:00:41 -0500 Subject: [PATCH 12/50] DOC: Fix minor typo in indexing.rst (#57206) --- doc/source/user_guide/indexing.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 4954ee1538697..7c8d3b9e1c869 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1730,7 +1730,7 @@ Returning a view versus a copy .. warning:: :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means than chained indexing will + will become the new default in pandas 3.0. This means that chained indexing will never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary anymore. See :ref:`this section ` @@ -1784,7 +1784,7 @@ Why does assignment fail when using chained indexing? .. warning:: :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means than chained indexing will + will become the new default in pandas 3.0. This means that chained indexing will never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary anymore. See :ref:`this section ` From 680b21545bb1d8a86936530fea9d664d9fd21eb9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Feb 2024 03:43:56 -1000 Subject: [PATCH 13/50] CLN: Move capitalize_first_letter to where it's used (#57096) --- pandas/core/dtypes/dtypes.py | 4 +--- pandas/util/__init__.py | 4 ---- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 5b51bc9debb33..a6a5f142faf1c 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -62,8 +62,6 @@ is_list_like, ) -from pandas.util import capitalize_first_letter - if not pa_version_under10p1: import pyarrow as pa @@ -1087,7 +1085,7 @@ def na_value(self) -> NaTType: def __eq__(self, other: object) -> bool: if isinstance(other, str): - return other in [self.name, capitalize_first_letter(self.name)] + return other[:1].lower() + other[1:] == self.name return super().__eq__(other) diff --git a/pandas/util/__init__.py b/pandas/util/__init__.py index 59ab324ba38ca..da109a514433f 100644 --- a/pandas/util/__init__.py +++ b/pandas/util/__init__.py @@ -27,7 +27,3 @@ def __getattr__(key: str): def __dir__(): return list(globals().keys()) + ["hash_array", "hash_pandas_object"] - - -def capitalize_first_letter(s: str) -> str: - return s[:1].upper() + s[1:] From 7736d4b6e158503d8ba901b761308dca1528a747 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Feb 2024 07:43:22 -1000 Subject: [PATCH 14/50] STY: Enable some passing rules (#57200) * Enable B006 * Enable B011 * Enable B019 * Enable B020 * Enable B020 * Enable PYI and othe rule * Enable PLC1901 * Use values instead of items * Use union * Ignore one case --- pandas/_libs/tslibs/vectorized.pyi | 6 ++---- pandas/core/indexers/objects.py | 2 +- pandas/io/formats/style_render.py | 2 +- pandas/io/json/_normalize.py | 4 ++-- pandas/tests/arrays/masked/test_arithmetic.py | 12 ++++++------ pandas/tests/copy_view/index/test_index.py | 4 ++-- pandas/tests/frame/test_query_eval.py | 8 ++++---- pandas/tests/io/test_feather.py | 4 +++- pandas/tests/plotting/common.py | 4 +++- pandas/tests/reshape/test_pivot.py | 8 ++++++-- pandas/tests/window/test_expanding.py | 8 ++++---- pandas/tests/window/test_rolling.py | 18 ++++++++++-------- pyproject.toml | 18 ------------------ 13 files changed, 44 insertions(+), 54 deletions(-) diff --git a/pandas/_libs/tslibs/vectorized.pyi b/pandas/_libs/tslibs/vectorized.pyi index de19f592da62b..f377c2e26ab81 100644 --- a/pandas/_libs/tslibs/vectorized.pyi +++ b/pandas/_libs/tslibs/vectorized.pyi @@ -1,7 +1,5 @@ -""" -For cython types that cannot be represented precisely, closest-available -python equivalents are used, and the precise types kept as adjacent comments. -""" +# For cython types that cannot be represented precisely, closest-available +# python equivalents are used, and the precise types kept as adjacent comments. from datetime import tzinfo import numpy as np diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index f2db4886a5590..5119089bac977 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -399,7 +399,7 @@ def get_window_bounds( start_arrays = [] end_arrays = [] window_indices_start = 0 - for key, indices in self.groupby_indices.items(): + for indices in self.groupby_indices.values(): index_array: np.ndarray | None if self.index_array is not None: diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 26c13e95fa280..80df46bf2336a 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1996,7 +1996,7 @@ class Tooltips: def __init__( self, - css_props: CSSProperties = [ + css_props: CSSProperties = [ # noqa: B006 ("visibility", "hidden"), ("position", "absolute"), ("z-index", 1), diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index de4033d5767e6..39fbce0b6901c 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -537,8 +537,8 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: if values.ndim > 1: # GH 37782 values = np.empty((len(v),), dtype=object) - for i, v in enumerate(v): - values[i] = v + for i, val in enumerate(v): + values[i] = val result[k] = values.repeat(lengths) return result diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index f4b571ca627b3..ea018d2da4d26 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -55,15 +55,15 @@ def test_array_scalar_like_equivalence(data, all_arithmetic_operators): scalar_array = pd.array([scalar] * len(data), dtype=data.dtype) # TODO also add len-1 array (np.array([scalar], dtype=data.dtype.numpy_dtype)) - for scalar in [scalar, data.dtype.type(scalar)]: + for val in [scalar, data.dtype.type(scalar)]: if is_bool_not_implemented(data, all_arithmetic_operators): msg = "operator '.*' not implemented for bool dtypes" with pytest.raises(NotImplementedError, match=msg): - op(data, scalar) + op(data, val) with pytest.raises(NotImplementedError, match=msg): op(data, scalar_array) else: - result = op(data, scalar) + result = op(data, val) expected = op(data, scalar_array) tm.assert_extension_array_equal(result, expected) @@ -214,13 +214,13 @@ def test_error_len_mismatch(data, all_arithmetic_operators): msg = "operator '.*' not implemented for bool dtypes" err = NotImplementedError - for other in [other, np.array(other)]: + for val in [other, np.array(other)]: with pytest.raises(err, match=msg): - op(data, other) + op(data, val) s = pd.Series(data) with pytest.raises(err, match=msg): - op(s, other) + op(s, val) @pytest.mark.parametrize("op", ["__neg__", "__abs__", "__invert__"]) diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 49d756cf32d34..596379a3266fb 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -10,7 +10,7 @@ from pandas.tests.copy_view.util import get_array -def index_view(index_data=[1, 2]): +def index_view(index_data): df = DataFrame({"a": index_data, "b": 1.5}) view = df[:] df = df.set_index("a", drop=True) @@ -142,7 +142,7 @@ def test_index_from_index(using_copy_on_write, warn_copy_on_write): ], ) def test_index_ops(using_copy_on_write, func, request): - idx, view_ = index_view() + idx, view_ = index_view([1, 2]) expected = idx.copy(deep=True) if "astype" in request.node.callspec.id: expected = expected.astype("Int64") diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 0a869d8f94f47..0e29db3ca85df 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -949,8 +949,8 @@ def test_str_query_method(self, parser, engine): ops = 2 * ([eq] + [ne]) msg = r"'(Not)?In' nodes are not implemented" - for lhs, op, rhs in zip(lhs, ops, rhs): - ex = f"{lhs} {op} {rhs}" + for lh, op_, rh in zip(lhs, ops, rhs): + ex = f"{lh} {op_} {rh}" with pytest.raises(NotImplementedError, match=msg): df.query( ex, @@ -990,8 +990,8 @@ def test_str_list_query_method(self, parser, engine): ops = 2 * ([eq] + [ne]) msg = r"'(Not)?In' nodes are not implemented" - for lhs, op, rhs in zip(lhs, ops, rhs): - ex = f"{lhs} {op} {rhs}" + for lh, ops_, rh in zip(lhs, ops, rhs): + ex = f"{lh} {ops_} {rh}" with pytest.raises(NotImplementedError, match=msg): df.query(ex, engine=engine, parser=parser) else: diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index f012fcadc5592..c8b5b690ae118 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -36,7 +36,9 @@ def check_external_error_on_write(self, df): with tm.ensure_clean() as path: to_feather(df, path) - def check_round_trip(self, df, expected=None, write_kwargs={}, **read_kwargs): + def check_round_trip(self, df, expected=None, write_kwargs=None, **read_kwargs): + if write_kwargs is None: + write_kwargs = {} if expected is None: expected = df.copy() diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 69120160699c2..5a46cdcb051b6 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -433,7 +433,7 @@ def _check_box_return_type( raise AssertionError -def _check_grid_settings(obj, kinds, kws={}): +def _check_grid_settings(obj, kinds, kws=None): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 import matplotlib as mpl @@ -446,6 +446,8 @@ def is_grid_on(): return not (xoff and yoff) + if kws is None: + kws = {} spndx = 1 for kind in kinds: mpl.pyplot.subplot(1, 4 * len(kinds), spndx) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index fbc3d2b8a7c35..f020fd45c87d9 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -890,10 +890,14 @@ def _check_output( result, values_col, data, - index=["A", "B"], - columns=["C"], + index=None, + columns=None, margins_col="All", ): + if index is None: + index = ["A", "B"] + if columns is None: + columns = ["C"] col_margins = result.loc[result.index[:-1], margins_col] expected_col_margins = data.groupby(index)[values_col].mean() tm.assert_series_equal(col_margins, expected_col_margins, check_names=False) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 6d452b27f3654..ad59f9e52514e 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -178,9 +178,9 @@ def test_expanding_count_with_min_periods_exceeding_series_length(frame_or_serie def test_iter_expanding_dataframe(df, expected, min_periods): # GH 11704 df = DataFrame(df) - expected = [DataFrame(values, index=index) for (values, index) in expected] + expecteds = [DataFrame(values, index=index) for (values, index) in expected] - for expected, actual in zip(expected, df.expanding(min_periods)): + for expected, actual in zip(expecteds, df.expanding(min_periods)): tm.assert_frame_equal(actual, expected) @@ -197,9 +197,9 @@ def test_iter_expanding_dataframe(df, expected, min_periods): ) def test_iter_expanding_series(ser, expected, min_periods): # GH 11704 - expected = [Series(values, index=index) for (values, index) in expected] + expecteds = [Series(values, index=index) for (values, index) in expected] - for expected, actual in zip(expected, ser.expanding(min_periods)): + for expected, actual in zip(expecteds, ser.expanding(min_periods)): tm.assert_series_equal(actual, expected) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index fda631987255a..85821ed2cfb6f 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -760,9 +760,9 @@ def test_rolling_count_default_min_periods_with_null_values(frame_or_series): def test_iter_rolling_dataframe(df, expected, window, min_periods): # GH 11704 df = DataFrame(df) - expected = [DataFrame(values, index=index) for (values, index) in expected] + expecteds = [DataFrame(values, index=index) for (values, index) in expected] - for expected, actual in zip(expected, df.rolling(window, min_periods=min_periods)): + for expected, actual in zip(expecteds, df.rolling(window, min_periods=min_periods)): tm.assert_frame_equal(actual, expected) @@ -805,10 +805,10 @@ def test_iter_rolling_on_dataframe(expected, window): } ) - expected = [ + expecteds = [ DataFrame(values, index=df.loc[index, "C"]) for (values, index) in expected ] - for expected, actual in zip(expected, df.rolling(window, on="C")): + for expected, actual in zip(expecteds, df.rolling(window, on="C")): tm.assert_frame_equal(actual, expected) @@ -856,9 +856,11 @@ def test_iter_rolling_on_dataframe_unordered(): ) def test_iter_rolling_series(ser, expected, window, min_periods): # GH 11704 - expected = [Series(values, index=index) for (values, index) in expected] + expecteds = [Series(values, index=index) for (values, index) in expected] - for expected, actual in zip(expected, ser.rolling(window, min_periods=min_periods)): + for expected, actual in zip( + expecteds, ser.rolling(window, min_periods=min_periods) + ): tm.assert_series_equal(actual, expected) @@ -904,11 +906,11 @@ def test_iter_rolling_datetime(expected, expected_index, window): # GH 11704 ser = Series(range(5), index=date_range(start="2020-01-01", periods=5, freq="D")) - expected = [ + expecteds = [ Series(values, index=idx) for (values, idx) in zip(expected, expected_index) ] - for expected, actual in zip(expected, ser.rolling(window)): + for expected, actual in zip(expecteds, ser.rolling(window)): tm.assert_series_equal(actual, expected) diff --git a/pyproject.toml b/pyproject.toml index 12d4c7c267428..1f99e57f1b239 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -245,12 +245,6 @@ ignore = [ "E402", # do not assign a lambda expression, use a def "E731", - # line break before binary operator - # "W503", # not yet implemented - # line break after binary operator - # "W504", # not yet implemented - # controversial - "B006", # controversial "B007", # controversial @@ -259,18 +253,10 @@ ignore = [ "B009", # getattr is used to side-step mypy "B010", - # tests use assert False - "B011", # tests use comparisons but not their returned value "B015", - # false positives - "B019", - # Loop control variable overrides iterable it iterates - "B020", # Function definition does not bind loop variable "B023", - # Functions defined inside a loop must not use variables redefined in the loop - # "B301", # not yet implemented # Only works with python >=3.10 "B905", # Too many arguments to function call @@ -285,14 +271,10 @@ ignore = [ "PLW2901", # Global statements are discouraged "PLW0603", - # Docstrings should not be included in stubs - "PYI021", # Use `typing.NamedTuple` instead of `collections.namedtuple` "PYI024", # No builtin `eval()` allowed "PGH001", - # compare-to-empty-string - "PLC1901", # while int | float can be shortened to float, the former is more explicit "PYI041", # incorrect-dict-iterator, flags valid Series.items usage From e5e96033c47bfe5f3034a2076ee6f81e4001a77c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Feb 2024 07:43:33 -1000 Subject: [PATCH 15/50] TST/CLN: Assorted (#57199) * Split test_xs * Remove unused variables * Bump up tolerance --- pandas/tests/config/test_localization.py | 1 - pandas/tests/frame/indexing/test_xs.py | 9 +++++---- pandas/tests/indexes/datetimes/test_constructors.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/config/test_localization.py b/pandas/tests/config/test_localization.py index 3907f557d1075..844f67cd2d0ea 100644 --- a/pandas/tests/config/test_localization.py +++ b/pandas/tests/config/test_localization.py @@ -15,7 +15,6 @@ import pandas as pd _all_locales = get_locales() -_current_locale = locale.setlocale(locale.LC_ALL) # getlocale() is wrong, see GH#46595 # Don't run any of these tests if we have no locales. pytestmark = pytest.mark.skipif(not _all_locales, reason="Need locales") diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 535137edd16cf..80b4635b94d3b 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -36,10 +36,7 @@ def four_level_index_dataframe(): class TestXS: - def test_xs( - self, float_frame, datetime_frame, using_copy_on_write, warn_copy_on_write - ): - float_frame_orig = float_frame.copy() + def test_xs(self, float_frame): idx = float_frame.index[5] xs = float_frame.xs(idx) for item, value in xs.items(): @@ -48,6 +45,7 @@ def test_xs( else: assert value == float_frame[item][idx] + def test_xs_mixed(self): # mixed-type xs test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} frame = DataFrame(test_data) @@ -56,11 +54,14 @@ def test_xs( assert xs["A"] == 1 assert xs["B"] == "1" + def test_xs_dt_error(self, datetime_frame): with pytest.raises( KeyError, match=re.escape("Timestamp('1999-12-31 00:00:00')") ): datetime_frame.xs(datetime_frame.index[0] - BDay()) + def test_xs_other(self, float_frame, using_copy_on_write, warn_copy_on_write): + float_frame_orig = float_frame.copy() # xs get column series = float_frame.xs("A", axis=1) expected = float_frame["A"] diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 7fdf17c797213..97e768b348d55 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1042,7 +1042,7 @@ def test_dti_constructor_with_non_nano_now_today(self): # result may not exactly match [now, today] so we'll test it up to a tolerance. # (it *may* match exactly due to rounding) - tolerance = pd.Timedelta(microseconds=1) + tolerance = pd.Timedelta(seconds=1) diff0 = result[0] - now.as_unit("s") assert diff0 >= pd.Timedelta(0), f"The difference is {diff0}" From b2f21289cb0ce2f73f095d1bb18415bd74f515a7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 2 Feb 2024 07:45:00 -1000 Subject: [PATCH 16/50] CLN: pyproject.toml (#57201) * CLN: Remove old coverage branches and ignored warnings * CLN: Remove old coverage branches and ignored warnings * Undo comment out * Test removing warnings * Add back some ignores --- pyproject.toml | 10 ---------- scripts/tests/data/deps_minimum.toml | 3 --- 2 files changed, 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f99e57f1b239..934f66136f601 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -476,20 +476,10 @@ filterwarnings = [ "ignore::ResourceWarning:asyncio", # From plotting doctests "ignore:More than 20 figures have been opened:RuntimeWarning", - # Will be fixed in numba 0.56: https://github.com/numba/numba/issues/7758 - "ignore:`np.MachAr` is deprecated:DeprecationWarning:numba", "ignore:.*urllib3:DeprecationWarning:botocore", "ignore:Setuptools is replacing distutils.:UserWarning:_distutils_hack", # https://github.com/PyTables/PyTables/issues/822 "ignore:a closed node found in the registry:UserWarning:tables", - "ignore:`np.object` is a deprecated:DeprecationWarning:tables", - "ignore:tostring:DeprecationWarning:tables", - "ignore:distutils Version classes are deprecated:DeprecationWarning:pandas_datareader", - "ignore:distutils Version classes are deprecated:DeprecationWarning:numexpr", - "ignore:distutils Version classes are deprecated:DeprecationWarning:fastparquet", - "ignore:distutils Version classes are deprecated:DeprecationWarning:fsspec", - # Can be removed once https://github.com/numpy/numpy/pull/24794 is merged - "ignore:.*In the future `np.long` will be defined as.*:FutureWarning", ] junit_family = "xunit2" markers = [ diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml index 0424920e5f446..ca1dc0c961c42 100644 --- a/scripts/tests/data/deps_minimum.toml +++ b/scripts/tests/data/deps_minimum.toml @@ -494,14 +494,11 @@ exclude_lines = [ "pragma: no cover", # Don't complain about missing debug-only code:s "def __repr__", - "if self.debug", # Don't complain if tests don't hit defensive assertion code: "raise AssertionError", "raise NotImplementedError", "AbstractMethodError", # Don't complain if non-runnable code isn't run: - "if 0:", - "if __name__ == .__main__.:", "if TYPE_CHECKING:", ] From fc05632d9e9dc6aaeee27cabd5fc2d04fc85145b Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Fri, 2 Feb 2024 12:53:29 -0500 Subject: [PATCH 17/50] REF: Remove internals.base (#57208) --- pandas/core/frame.py | 6 +- pandas/core/internals/__init__.py | 6 - pandas/core/internals/base.py | 406 ----------------------------- pandas/core/internals/managers.py | 322 ++++++++++++++++++++++- pandas/tests/internals/test_api.py | 3 - 5 files changed, 314 insertions(+), 429 deletions(-) delete mode 100644 pandas/core/internals/base.py diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ca488190a8704..207e3e7635cac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -259,7 +259,7 @@ from pandas.core.groupby.generic import DataFrameGroupBy from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg - from pandas.core.internals import SingleDataManager + from pandas.core.internals.managers import SingleBlockManager from pandas.io.formats.style import Styler @@ -4530,14 +4530,14 @@ def _ensure_valid_index(self, value) -> None: self._mgr = self._mgr.reindex_axis(index_copy, axis=1, fill_value=np.nan) - def _box_col_values(self, values: SingleDataManager, loc: int) -> Series: + def _box_col_values(self, values: SingleBlockManager, loc: int) -> Series: """ Provide boxed values for a column. """ # Lookup in columns so that if e.g. a str datetime was passed # we attach the Timestamp object as the name. name = self.columns[loc] - # We get index=self.index bc values is a SingleDataManager + # We get index=self.index bc values is a SingleBlockManager obj = self._constructor_sliced_from_mgr(values, axes=values.axes) obj._name = name return obj.__finalize__(self) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index da394e783be4b..fb14c5ad82f4f 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -1,8 +1,4 @@ from pandas.core.internals.api import make_block # 2023-09-18 pyarrow uses this -from pandas.core.internals.base import ( - DataManager, - SingleDataManager, -) from pandas.core.internals.concat import concatenate_managers from pandas.core.internals.managers import ( BlockManager, @@ -14,9 +10,7 @@ "DatetimeTZBlock", # pylint: disable=undefined-all-variable "ExtensionBlock", # pylint: disable=undefined-all-variable "make_block", - "DataManager", "BlockManager", - "SingleDataManager", "SingleBlockManager", "concatenate_managers", ] diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py deleted file mode 100644 index d6d588d5e2492..0000000000000 --- a/pandas/core/internals/base.py +++ /dev/null @@ -1,406 +0,0 @@ -""" -Base class for the internal managers. BlockManager inherits from this class. -""" -from __future__ import annotations - -from typing import ( - TYPE_CHECKING, - Any, - Literal, - cast, - final, -) - -import numpy as np - -from pandas._config import ( - using_copy_on_write, - warn_copy_on_write, -) - -from pandas._libs import ( - algos as libalgos, - lib, -) -from pandas.errors import AbstractMethodError -from pandas.util._validators import validate_bool_kwarg - -from pandas.core.dtypes.cast import ( - find_common_type, - np_can_hold_element, -) -from pandas.core.dtypes.dtypes import ( - ExtensionDtype, - SparseDtype, -) - -from pandas.core.base import PandasObject -from pandas.core.construction import extract_array -from pandas.core.indexes.api import ( - Index, - default_index, -) - -if TYPE_CHECKING: - from pandas._typing import ( - ArrayLike, - AxisInt, - DtypeObj, - Self, - Shape, - ) - - -class _AlreadyWarned: - def __init__(self) -> None: - # This class is used on the manager level to the block level to - # ensure that we warn only once. The block method can update the - # warned_already option without returning a value to keep the - # interface consistent. This is only a temporary solution for - # CoW warnings. - self.warned_already = False - - -class DataManager(PandasObject): - # TODO share more methods/attributes - - axes: list[Index] - - @property - def items(self) -> Index: - raise AbstractMethodError(self) - - @final - def __len__(self) -> int: - return len(self.items) - - @property - def ndim(self) -> int: - return len(self.axes) - - @property - def shape(self) -> Shape: - return tuple(len(ax) for ax in self.axes) - - @final - def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None: - # Caller is responsible for ensuring we have an Index object. - old_len = len(self.axes[axis]) - new_len = len(new_labels) - - if axis == 1 and len(self.items) == 0: - # If we are setting the index on a DataFrame with no columns, - # it is OK to change the length. - pass - - elif new_len != old_len: - raise ValueError( - f"Length mismatch: Expected axis has {old_len} elements, new " - f"values have {new_len} elements" - ) - - def reindex_indexer( - self, - new_axis, - indexer, - axis: AxisInt, - fill_value=None, - allow_dups: bool = False, - copy: bool = True, - only_slice: bool = False, - ) -> Self: - raise AbstractMethodError(self) - - @final - def reindex_axis( - self, - new_index: Index, - axis: AxisInt, - fill_value=None, - only_slice: bool = False, - ) -> Self: - """ - Conform data manager to new index. - """ - new_index, indexer = self.axes[axis].reindex(new_index) - - return self.reindex_indexer( - new_index, - indexer, - axis=axis, - fill_value=fill_value, - copy=False, - only_slice=only_slice, - ) - - def _equal_values(self, other: Self) -> bool: - """ - To be implemented by the subclasses. Only check the column values - assuming shape and indexes have already been checked. - """ - raise AbstractMethodError(self) - - @final - def equals(self, other: object) -> bool: - """ - Implementation for DataFrame.equals - """ - if not isinstance(other, type(self)): - return False - - self_axes, other_axes = self.axes, other.axes - if len(self_axes) != len(other_axes): - return False - if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): - return False - - return self._equal_values(other) - - def apply( - self, - f, - align_keys: list[str] | None = None, - **kwargs, - ) -> Self: - raise AbstractMethodError(self) - - def apply_with_block( - self, - f, - align_keys: list[str] | None = None, - **kwargs, - ) -> Self: - raise AbstractMethodError(self) - - @final - def isna(self, func) -> Self: - return self.apply("apply", func=func) - - @final - def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: - if limit is not None: - # Do this validation even if we go through one of the no-op paths - limit = libalgos.validate_limit(None, limit=limit) - - return self.apply( - "fillna", - value=value, - limit=limit, - inplace=inplace, - downcast=downcast, - using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), - ) - - @final - def where(self, other, cond, align: bool) -> Self: - if align: - align_keys = ["other", "cond"] - else: - align_keys = ["cond"] - other = extract_array(other, extract_numpy=True) - - return self.apply( - "where", - align_keys=align_keys, - other=other, - cond=cond, - using_cow=using_copy_on_write(), - ) - - @final - def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self: - if align: - align_keys = ["new", "mask"] - else: - align_keys = ["mask"] - new = extract_array(new, extract_numpy=True) - - already_warned = None - if warn_copy_on_write(): - already_warned = _AlreadyWarned() - if not warn: - already_warned.warned_already = True - - return self.apply( - "putmask", - align_keys=align_keys, - mask=mask, - new=new, - using_cow=using_copy_on_write(), - already_warned=already_warned, - ) - - @final - def round(self, decimals: int, using_cow: bool = False) -> Self: - return self.apply( - "round", - decimals=decimals, - using_cow=using_cow, - ) - - @final - def replace(self, to_replace, value, inplace: bool) -> Self: - inplace = validate_bool_kwarg(inplace, "inplace") - # NDFrame.replace ensures the not-is_list_likes here - assert not lib.is_list_like(to_replace) - assert not lib.is_list_like(value) - return self.apply( - "replace", - to_replace=to_replace, - value=value, - inplace=inplace, - using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), - ) - - @final - def replace_regex(self, **kwargs) -> Self: - return self.apply( - "_replace_regex", - **kwargs, - using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), - ) - - @final - def replace_list( - self, - src_list: list[Any], - dest_list: list[Any], - inplace: bool = False, - regex: bool = False, - ) -> Self: - """do a list replace""" - inplace = validate_bool_kwarg(inplace, "inplace") - - bm = self.apply( - "replace_list", - src_list=src_list, - dest_list=dest_list, - inplace=inplace, - regex=regex, - using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), - ) - bm._consolidate_inplace() - return bm - - def interpolate(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "interpolate", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), - ) - - def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "pad_or_backfill", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), - ) - - def shift(self, periods: int, fill_value) -> Self: - if fill_value is lib.no_default: - fill_value = None - - return self.apply("shift", periods=periods, fill_value=fill_value) - - # -------------------------------------------------------------------- - # Consolidation: No-ops for all but BlockManager - - def is_consolidated(self) -> bool: - return True - - def consolidate(self) -> Self: - return self - - def _consolidate_inplace(self) -> None: - return - - -class SingleDataManager(DataManager): - @property - def ndim(self) -> Literal[1]: - return 1 - - @final - @property - def array(self) -> ArrayLike: - """ - Quick access to the backing array of the Block. - """ - # error: "SingleDataManager" has no attribute "arrays"; maybe "array" - return self.arrays[0] # type: ignore[attr-defined] - - def setitem_inplace(self, indexer, value, warn: bool = True) -> None: - """ - Set values with indexer. - - For SingleBlockManager, this backs s[indexer] = value - - This is an inplace version of `setitem()`, mutating the manager/values - in place, not returning a new Manager (and Block), and thus never changing - the dtype. - """ - arr = self.array - - # EAs will do this validation in their own __setitem__ methods. - if isinstance(arr, np.ndarray): - # Note: checking for ndarray instead of np.dtype means we exclude - # dt64/td64, which do their own validation. - value = np_can_hold_element(arr.dtype, value) - - if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1: - # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 - value = value[0, ...] - - arr[indexer] = value - - def grouped_reduce(self, func): - arr = self.array - res = func(arr) - index = default_index(len(res)) - - mgr = type(self).from_array(res, index) - return mgr - - @classmethod - def from_array(cls, arr: ArrayLike, index: Index): - raise AbstractMethodError(cls) - - -def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: - """ - Find the common dtype for `blocks`. - - Parameters - ---------- - blocks : List[DtypeObj] - - Returns - ------- - dtype : np.dtype, ExtensionDtype, or None - None is returned when `blocks` is empty. - """ - if not len(dtypes): - return None - - return find_common_type(dtypes) - - -def ensure_np_dtype(dtype: DtypeObj) -> np.dtype: - # TODO: https://github.com/pandas-dev/pandas/issues/22791 - # Give EAs some input on what happens here. Sparse needs this. - if isinstance(dtype, SparseDtype): - dtype = dtype.subtype - dtype = cast(np.dtype, dtype) - elif isinstance(dtype, ExtensionDtype): - dtype = np.dtype("object") - elif dtype == np.dtype(str): - dtype = np.dtype("object") - return dtype diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c6a8b61e0c51e..e1a18cb79a1d6 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -7,9 +7,11 @@ import itertools from typing import ( TYPE_CHECKING, + Any, Callable, Literal, cast, + final, ) import warnings import weakref @@ -22,6 +24,7 @@ ) from pandas._libs import ( + algos as libalgos, internals as libinternals, lib, ) @@ -30,11 +33,19 @@ BlockValuesRefs, ) from pandas._libs.tslibs import Timestamp -from pandas.errors import PerformanceWarning +from pandas.errors import ( + AbstractMethodError, + PerformanceWarning, +) from pandas.util._decorators import cache_readonly from pandas.util._exceptions import find_stack_level +from pandas.util._validators import validate_bool_kwarg -from pandas.core.dtypes.cast import infer_dtype_from_scalar +from pandas.core.dtypes.cast import ( + find_common_type, + infer_dtype_from_scalar, + np_can_hold_element, +) from pandas.core.dtypes.common import ( ensure_platform_int, is_1d_only_ea_dtype, @@ -43,6 +54,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, ExtensionDtype, + SparseDtype, ) from pandas.core.dtypes.generic import ( ABCDataFrame, @@ -60,6 +72,7 @@ DatetimeArray, ) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray +from pandas.core.base import PandasObject from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, @@ -67,14 +80,9 @@ from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import ( Index, + default_index, ensure_index, ) -from pandas.core.internals.base import ( - DataManager, - SingleDataManager, - ensure_np_dtype, - interleaved_dtype, -) from pandas.core.internals.blocks import ( COW_WARNING_GENERAL_MSG, COW_WARNING_SETITEM_MSG, @@ -106,7 +114,49 @@ from pandas.api.extensions import ExtensionArray -class BaseBlockManager(DataManager): +def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None: + """ + Find the common dtype for `blocks`. + + Parameters + ---------- + blocks : List[DtypeObj] + + Returns + ------- + dtype : np.dtype, ExtensionDtype, or None + None is returned when `blocks` is empty. + """ + if not len(dtypes): + return None + + return find_common_type(dtypes) + + +def ensure_np_dtype(dtype: DtypeObj) -> np.dtype: + # TODO: https://github.com/pandas-dev/pandas/issues/22791 + # Give EAs some input on what happens here. Sparse needs this. + if isinstance(dtype, SparseDtype): + dtype = dtype.subtype + dtype = cast(np.dtype, dtype) + elif isinstance(dtype, ExtensionDtype): + dtype = np.dtype("object") + elif dtype == np.dtype(str): + dtype = np.dtype("object") + return dtype + + +class _AlreadyWarned: + def __init__(self) -> None: + # This class is used on the manager level to the block level to + # ensure that we warn only once. The block method can update the + # warned_already option without returning a value to keep the + # interface consistent. This is only a temporary solution for + # CoW warnings. + self.warned_already = False + + +class BaseBlockManager(PandasObject): """ Core internal data structure to implement DataFrame, Series, etc. @@ -174,6 +224,14 @@ def ndim(self) -> int: def __init__(self, blocks, axes, verify_integrity: bool = True) -> None: raise NotImplementedError + @final + def __len__(self) -> int: + return len(self.items) + + @property + def shape(self) -> Shape: + return tuple(len(ax) for ax in self.axes) + @classmethod def from_blocks(cls, blocks: list[Block], axes: list[Index]) -> Self: raise NotImplementedError @@ -239,6 +297,23 @@ def set_axis(self, axis: AxisInt, new_labels: Index) -> None: self._validate_set_axis(axis, new_labels) self.axes[axis] = new_labels + @final + def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None: + # Caller is responsible for ensuring we have an Index object. + old_len = len(self.axes[axis]) + new_len = len(new_labels) + + if axis == 1 and len(self.items) == 0: + # If we are setting the index on a DataFrame with no columns, + # it is OK to change the length. + pass + + elif new_len != old_len: + raise ValueError( + f"Length mismatch: Expected axis has {old_len} elements, new " + f"values have {new_len} elements" + ) + @property def is_single_block(self) -> bool: # Assumes we are 2D; overridden by SingleBlockManager @@ -315,6 +390,29 @@ def __repr__(self) -> str: output += f"\n{block}" return output + def _equal_values(self, other: Self) -> bool: + """ + To be implemented by the subclasses. Only check the column values + assuming shape and indexes have already been checked. + """ + raise AbstractMethodError(self) + + @final + def equals(self, other: object) -> bool: + """ + Implementation for DataFrame.equals + """ + if not isinstance(other, type(self)): + return False + + self_axes, other_axes = self.axes, other.axes + if len(self_axes) != len(other_axes): + return False + if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)): + return False + + return self._equal_values(other) + def apply( self, f, @@ -367,6 +465,152 @@ def apply( out = type(self).from_blocks(result_blocks, self.axes) return out + def apply_with_block( + self, + f, + align_keys: list[str] | None = None, + **kwargs, + ) -> Self: + raise AbstractMethodError(self) + + @final + def isna(self, func) -> Self: + return self.apply("apply", func=func) + + @final + def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: + if limit is not None: + # Do this validation even if we go through one of the no-op paths + limit = libalgos.validate_limit(None, limit=limit) + + return self.apply( + "fillna", + value=value, + limit=limit, + inplace=inplace, + downcast=downcast, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), + ) + + @final + def where(self, other, cond, align: bool) -> Self: + if align: + align_keys = ["other", "cond"] + else: + align_keys = ["cond"] + other = extract_array(other, extract_numpy=True) + + return self.apply( + "where", + align_keys=align_keys, + other=other, + cond=cond, + using_cow=using_copy_on_write(), + ) + + @final + def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self: + if align: + align_keys = ["new", "mask"] + else: + align_keys = ["mask"] + new = extract_array(new, extract_numpy=True) + + already_warned = None + if warn_copy_on_write(): + already_warned = _AlreadyWarned() + if not warn: + already_warned.warned_already = True + + return self.apply( + "putmask", + align_keys=align_keys, + mask=mask, + new=new, + using_cow=using_copy_on_write(), + already_warned=already_warned, + ) + + @final + def round(self, decimals: int, using_cow: bool = False) -> Self: + return self.apply( + "round", + decimals=decimals, + using_cow=using_cow, + ) + + @final + def replace(self, to_replace, value, inplace: bool) -> Self: + inplace = validate_bool_kwarg(inplace, "inplace") + # NDFrame.replace ensures the not-is_list_likes here + assert not lib.is_list_like(to_replace) + assert not lib.is_list_like(value) + return self.apply( + "replace", + to_replace=to_replace, + value=value, + inplace=inplace, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), + ) + + @final + def replace_regex(self, **kwargs) -> Self: + return self.apply( + "_replace_regex", + **kwargs, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), + ) + + @final + def replace_list( + self, + src_list: list[Any], + dest_list: list[Any], + inplace: bool = False, + regex: bool = False, + ) -> Self: + """do a list replace""" + inplace = validate_bool_kwarg(inplace, "inplace") + + bm = self.apply( + "replace_list", + src_list=src_list, + dest_list=dest_list, + inplace=inplace, + regex=regex, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), + ) + bm._consolidate_inplace() + return bm + + def interpolate(self, inplace: bool, **kwargs) -> Self: + return self.apply( + "interpolate", + inplace=inplace, + **kwargs, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), + ) + + def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: + return self.apply( + "pad_or_backfill", + inplace=inplace, + **kwargs, + using_cow=using_copy_on_write(), + already_warned=_AlreadyWarned(), + ) + + def shift(self, periods: int, fill_value) -> Self: + if fill_value is lib.no_default: + fill_value = None + + return self.apply("shift", periods=periods, fill_value=fill_value) + def setitem(self, indexer, value, warn: bool = True) -> Self: """ Set values with indexer. @@ -602,6 +846,9 @@ def copy_func(ax): res._consolidate_inplace() return res + def is_consolidated(self) -> bool: + return True + def consolidate(self) -> Self: """ Join together blocks having same dtype @@ -618,6 +865,31 @@ def consolidate(self) -> Self: bm._consolidate_inplace() return bm + def _consolidate_inplace(self) -> None: + return + + @final + def reindex_axis( + self, + new_index: Index, + axis: AxisInt, + fill_value=None, + only_slice: bool = False, + ) -> Self: + """ + Conform data manager to new index. + """ + new_index, indexer = self.axes[axis].reindex(new_index) + + return self.reindex_indexer( + new_index, + indexer, + axis=axis, + fill_value=fill_value, + copy=False, + only_slice=only_slice, + ) + def reindex_indexer( self, new_axis: Index, @@ -1820,7 +2092,7 @@ def concat_vertical(cls, mgrs: list[Self], axes: list[Index]) -> Self: raise NotImplementedError("This logic lives (for now) in internals.concat") -class SingleBlockManager(BaseBlockManager, SingleDataManager): +class SingleBlockManager(BaseBlockManager): """manage a single block with""" @property @@ -1939,6 +2211,14 @@ def _post_setstate(self) -> None: def _block(self) -> Block: return self.blocks[0] + @final + @property + def array(self) -> ArrayLike: + """ + Quick access to the backing array of the Block. + """ + return self.arrays[0] + # error: Cannot override writeable attribute with read-only property @property def _blknos(self) -> None: # type: ignore[override] @@ -2041,7 +2321,19 @@ def setitem_inplace(self, indexer, value, warn: bool = True) -> None: stacklevel=find_stack_level(), ) - super().setitem_inplace(indexer, value) + arr = self.array + + # EAs will do this validation in their own __setitem__ methods. + if isinstance(arr, np.ndarray): + # Note: checking for ndarray instead of np.dtype means we exclude + # dt64/td64, which do their own validation. + value = np_can_hold_element(arr.dtype, value) + + if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1: + # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 + value = value[0, ...] + + arr[indexer] = value def idelete(self, indexer) -> SingleBlockManager: """ @@ -2087,6 +2379,14 @@ def _equal_values(self, other: Self) -> bool: right = other.blocks[0].values return array_equals(left, right) + def grouped_reduce(self, func): + arr = self.array + res = func(arr) + index = default_index(len(res)) + + mgr = type(self).from_array(res, index) + return mgr + # -------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index ccd7222fb16e1..7c1d3ff774d0a 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -23,15 +23,12 @@ def test_namespace(): "concat", "managers", "construction", - "base", "api", "ops", ] expected = [ "make_block", - "DataManager", "BlockManager", - "SingleDataManager", "SingleBlockManager", "concatenate_managers", ] From 9c76d546a2eaa53bfb9069a3ebe31151ebcf24c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Fri, 2 Feb 2024 11:55:15 -0600 Subject: [PATCH 18/50] check ExtensionType in is_datetime64_any_dtype for array-likes (#57060) * check for ExtensionType in is_datetime64_any_dtype * use pre-commit * add test and move doc entry * check not date in test * fix condition * check type is not datetime.date * fix comparison * move description to ExtensionArray * return True for date types --- doc/source/whatsnew/v3.0.0.rst | 3 +-- pandas/core/dtypes/common.py | 6 +++++- pandas/tests/extension/test_arrow.py | 9 +++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f9117253b61c1..d6d33ed873564 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -130,7 +130,6 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`) - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) -- Categorical ^^^^^^^^^^^ @@ -219,7 +218,7 @@ Sparse ExtensionArray ^^^^^^^^^^^^^^ -- +- Fixed bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`) - Styler diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 5e5b7bdad74d8..a53bbe9935684 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -890,7 +890,11 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: tipo = _get_dtype(arr_or_dtype) except TypeError: return False - return lib.is_np_dtype(tipo, "M") or isinstance(tipo, DatetimeTZDtype) + return ( + lib.is_np_dtype(tipo, "M") + or isinstance(tipo, DatetimeTZDtype) + or (isinstance(tipo, ExtensionDtype) and tipo.kind == "M") + ) def is_datetime64_ns_dtype(arr_or_dtype) -> bool: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 6970c589dd36f..62e4629ca7cb7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -53,6 +53,7 @@ from pandas.api.extensions import no_default from pandas.api.types import ( is_bool_dtype, + is_datetime64_any_dtype, is_float_dtype, is_integer_dtype, is_numeric_dtype, @@ -1531,6 +1532,14 @@ def test_is_unsigned_integer_dtype(data): assert not is_unsigned_integer_dtype(data) +def test_is_datetime64_any_dtype(data): + pa_type = data.dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_type) or pa.types.is_date(pa_type): + assert is_datetime64_any_dtype(data) + else: + assert not is_datetime64_any_dtype(data) + + def test_is_float_dtype(data): pa_type = data.dtype.pyarrow_dtype if pa.types.is_floating(pa_type): From 4663edd90a7ebc744aa89e75805d4217940fb8e6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 2 Feb 2024 09:56:47 -0800 Subject: [PATCH 19/50] ENH: read_stata return non-nano (#55642) * ENH: read_stata return non-nano * GH ref * mypy fixup * update doctest * simplify * avoid Series.view * dont go through Series * move whatsnew * remove outdated whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/io/stata.py | 146 ++++++++++++--------------------- pandas/tests/io/test_stata.py | 82 ++++++++++++------ 3 files changed, 110 insertions(+), 119 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d6d33ed873564..73776c2d6ee21 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -29,6 +29,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`) +- :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 15ef20e9f453e..a2c15938c04bf 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -62,7 +62,6 @@ Timestamp, isna, to_datetime, - to_timedelta, ) from pandas.core.frame import DataFrame from pandas.core.indexes.base import Index @@ -232,6 +231,7 @@ stata_epoch: Final = datetime(1960, 1, 1) +unix_epoch: Final = datetime(1970, 1, 1) def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: @@ -256,7 +256,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: >>> dates = pd.Series([52]) >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw") 0 1961-01-01 - dtype: datetime64[ns] + dtype: datetime64[s] Notes ----- @@ -280,64 +280,43 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: date - ty years since 0000 """ - MIN_YEAR, MAX_YEAR = Timestamp.min.year, Timestamp.max.year - MAX_DAY_DELTA = (Timestamp.max - datetime(1960, 1, 1)).days - MIN_DAY_DELTA = (Timestamp.min - datetime(1960, 1, 1)).days - MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000 - MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000 - def convert_year_month_safe(year, month) -> Series: - """ - Convert year and month to datetimes, using pandas vectorized versions - when the date range falls within the range supported by pandas. - Otherwise it falls back to a slower but more robust method - using datetime. - """ - if year.max() < MAX_YEAR and year.min() > MIN_YEAR: - return to_datetime(100 * year + month, format="%Y%m") - else: - index = getattr(year, "index", None) - return Series([datetime(y, m, 1) for y, m in zip(year, month)], index=index) - - def convert_year_days_safe(year, days) -> Series: - """ - Converts year (e.g. 1999) and days since the start of the year to a - datetime or datetime64 Series - """ - if year.max() < (MAX_YEAR - 1) and year.min() > MIN_YEAR: - return to_datetime(year, format="%Y") + to_timedelta(days, unit="d") - else: - index = getattr(year, "index", None) - value = [ - datetime(y, 1, 1) + timedelta(days=int(d)) for y, d in zip(year, days) - ] - return Series(value, index=index) + if fmt.startswith(("%tc", "tc")): + # Delta ms relative to base + td = np.timedelta64(stata_epoch - unix_epoch, "ms") + res = np.array(dates._values, dtype="M8[ms]") + td + return Series(res, index=dates.index) - def convert_delta_safe(base, deltas, unit) -> Series: - """ - Convert base dates and deltas to datetimes, using pandas vectorized - versions if the deltas satisfy restrictions required to be expressed - as dates in pandas. - """ - index = getattr(deltas, "index", None) - if unit == "d": - if deltas.max() > MAX_DAY_DELTA or deltas.min() < MIN_DAY_DELTA: - values = [base + timedelta(days=int(d)) for d in deltas] - return Series(values, index=index) - elif unit == "ms": - if deltas.max() > MAX_MS_DELTA or deltas.min() < MIN_MS_DELTA: - values = [ - base + timedelta(microseconds=(int(d) * 1000)) for d in deltas - ] - return Series(values, index=index) - else: - raise ValueError("format not understood") - base = to_datetime(base) - deltas = to_timedelta(deltas, unit=unit) - return base + deltas + elif fmt.startswith(("%td", "td", "%d", "d")): + # Delta days relative to base + td = np.timedelta64(stata_epoch - unix_epoch, "D") + res = np.array(dates._values, dtype="M8[D]") + td + return Series(res, index=dates.index) + + elif fmt.startswith(("%tm", "tm")): + # Delta months relative to base + ordinals = dates + (stata_epoch.year - unix_epoch.year) * 12 + res = np.array(ordinals, dtype="M8[M]").astype("M8[s]") + return Series(res, index=dates.index) + + elif fmt.startswith(("%tq", "tq")): + # Delta quarters relative to base + ordinals = dates + (stata_epoch.year - unix_epoch.year) * 4 + res = np.array(ordinals, dtype="M8[3M]").astype("M8[s]") + return Series(res, index=dates.index) + + elif fmt.startswith(("%th", "th")): + # Delta half-years relative to base + ordinals = dates + (stata_epoch.year - unix_epoch.year) * 2 + res = np.array(ordinals, dtype="M8[6M]").astype("M8[s]") + return Series(res, index=dates.index) + + elif fmt.startswith(("%ty", "ty")): + # Years -- not delta + ordinals = dates - 1970 + res = np.array(ordinals, dtype="M8[Y]").astype("M8[s]") + return Series(res, index=dates.index) - # TODO(non-nano): If/when pandas supports more than datetime64[ns], this - # should be improved to use correct range, e.g. datetime[Y] for yearly bad_locs = np.isnan(dates) has_bad_values = False if bad_locs.any(): @@ -345,11 +324,7 @@ def convert_delta_safe(base, deltas, unit) -> Series: dates._values[bad_locs] = 1.0 # Replace with NaT dates = dates.astype(np.int64) - if fmt.startswith(("%tc", "tc")): # Delta ms relative to base - base = stata_epoch - ms = dates - conv_dates = convert_delta_safe(base, ms, "ms") - elif fmt.startswith(("%tC", "tC")): + if fmt.startswith(("%tC", "tC")): warnings.warn( "Encountered %tC format. Leaving in Stata Internal Format.", stacklevel=find_stack_level(), @@ -358,33 +333,18 @@ def convert_delta_safe(base, deltas, unit) -> Series: if has_bad_values: conv_dates[bad_locs] = NaT return conv_dates - # Delta days relative to base - elif fmt.startswith(("%td", "td", "%d", "d")): - base = stata_epoch - days = dates - conv_dates = convert_delta_safe(base, days, "d") # does not count leap days - 7 days is a week. # 52nd week may have more than 7 days elif fmt.startswith(("%tw", "tw")): year = stata_epoch.year + dates // 52 days = (dates % 52) * 7 - conv_dates = convert_year_days_safe(year, days) - elif fmt.startswith(("%tm", "tm")): # Delta months relative to base - year = stata_epoch.year + dates // 12 - month = (dates % 12) + 1 - conv_dates = convert_year_month_safe(year, month) - elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base - year = stata_epoch.year + dates // 4 - quarter_month = (dates % 4) * 3 + 1 - conv_dates = convert_year_month_safe(year, quarter_month) - elif fmt.startswith(("%th", "th")): # Delta half-years relative to base - year = stata_epoch.year + dates // 2 - month = (dates % 2) * 6 + 1 - conv_dates = convert_year_month_safe(year, month) - elif fmt.startswith(("%ty", "ty")): # Years -- not delta - year = dates - first_month = np.ones_like(dates) - conv_dates = convert_year_month_safe(year, first_month) + per_y = (year - 1970).array.view("Period[Y]") + per_d = per_y.asfreq("D", how="S") + per_d_shifted = per_d + days._values + per_s = per_d_shifted.asfreq("s", how="S") + conv_dates_arr = per_s.view("M8[s]") + conv_dates = Series(conv_dates_arr, index=dates.index) + else: raise ValueError(f"Date fmt {fmt} not understood") @@ -409,6 +369,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: index = dates.index NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000 US_PER_DAY = NS_PER_DAY / 1000 + MS_PER_DAY = NS_PER_DAY / 1_000_000 def parse_dates_safe( dates: Series, delta: bool = False, year: bool = False, days: bool = False @@ -416,17 +377,18 @@ def parse_dates_safe( d = {} if lib.is_np_dtype(dates.dtype, "M"): if delta: - time_delta = dates - Timestamp(stata_epoch).as_unit("ns") - d["delta"] = time_delta._values.view(np.int64) // 1000 # microseconds + time_delta = dates.dt.as_unit("ms") - Timestamp(stata_epoch).as_unit( + "ms" + ) + d["delta"] = time_delta._values.view(np.int64) if days or year: date_index = DatetimeIndex(dates) d["year"] = date_index._data.year d["month"] = date_index._data.month if days: - days_in_ns = dates._values.view(np.int64) - to_datetime( - d["year"], format="%Y" - )._values.view(np.int64) - d["days"] = days_in_ns // NS_PER_DAY + year_start = np.asarray(dates).astype("M8[Y]").astype(dates.dtype) + diff = dates - year_start + d["days"] = np.asarray(diff).astype("m8[D]").view("int64") elif infer_dtype(dates, skipna=False) == "datetime": if delta: @@ -466,7 +428,7 @@ def g(x: datetime) -> int: if fmt in ["%tc", "tc"]: d = parse_dates_safe(dates, delta=True) - conv_dates = d.delta / 1000 + conv_dates = d.delta elif fmt in ["%tC", "tC"]: warnings.warn( "Stata Internal Format tC not supported.", @@ -475,7 +437,7 @@ def g(x: datetime) -> int: conv_dates = dates elif fmt in ["%td", "td"]: d = parse_dates_safe(dates, delta=True) - conv_dates = d.delta // US_PER_DAY + conv_dates = d.delta // MS_PER_DAY elif fmt in ["%tw", "tw"]: d = parse_dates_safe(dates, year=True, days=True) conv_dates = 52 * (d.year - stata_epoch.year) + d.days // 7 diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 4aa0b2f5ae8c9..c12bcfb91a4c7 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -174,7 +174,16 @@ def test_read_dta2(self, datapath): "yearly_date", ], ) - expected["yearly_date"] = expected["yearly_date"].astype("O") + # TODO(GH#55564): just pass M8[s] to the constructor + expected["datetime_c"] = expected["datetime_c"].astype("M8[ms]") + expected["date"] = expected["date"].astype("M8[s]") + expected["weekly_date"] = expected["weekly_date"].astype("M8[s]") + expected["monthly_date"] = expected["monthly_date"].astype("M8[s]") + expected["quarterly_date"] = expected["quarterly_date"].astype("M8[s]") + expected["half_yearly_date"] = expected["half_yearly_date"].astype("M8[s]") + expected["yearly_date"] = ( + expected["yearly_date"].astype("Period[s]").array.view("M8[s]") + ) path1 = datapath("io", "data", "stata", "stata2_114.dta") path2 = datapath("io", "data", "stata", "stata2_115.dta") @@ -360,12 +369,15 @@ def test_read_write_dta10(self, version): with tm.ensure_clean() as path: original.to_stata(path, convert_dates={"datetime": "tc"}, version=version) written_and_read_again = self.read_dta(path) - # original.index is np.int32, read index is np.int64 - tm.assert_frame_equal( - written_and_read_again.set_index("index"), - original, - check_index_type=False, - ) + + expected = original[:] + # "tc" convert_dates means we store in ms + expected["datetime"] = expected["datetime"].astype("M8[ms]") + + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + expected, + ) def test_stata_doc_examples(self): with tm.ensure_clean() as path: @@ -514,9 +526,10 @@ def test_read_write_reread_dta15(self, file, datapath): expected["long_"] = expected["long_"].astype(np.int32) expected["float_"] = expected["float_"].astype(np.float32) expected["double_"] = expected["double_"].astype(np.float64) - expected["date_td"] = expected["date_td"].apply( - datetime.strptime, args=("%Y-%m-%d",) - ) + + # TODO(GH#55564): directly cast to M8[s] + arr = expected["date_td"].astype("Period[D]")._values.asfreq("s", how="S") + expected["date_td"] = arr.view("M8[s]") file = datapath("io", "data", "stata", f"{file}.dta") parsed = self.read_dta(file) @@ -636,10 +649,11 @@ def test_dates_invalid_column(self): written_and_read_again = self.read_dta(path) - modified = original - modified.columns = ["_0"] - modified.index = original.index.astype(np.int32) - tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + expected = original.copy() + expected.columns = ["_0"] + expected.index = original.index.astype(np.int32) + expected["_0"] = expected["_0"].astype("M8[ms]") + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_105(self, datapath): # Data obtained from: @@ -684,7 +698,9 @@ def test_date_export_formats(self): [expected_values], index=pd.Index([0], dtype=np.int32, name="index"), columns=columns, + dtype="M8[s]", ) + expected["tc"] = expected["tc"].astype("M8[ms]") with tm.ensure_clean() as path: original.to_stata(path, convert_dates=conversions) @@ -881,6 +897,14 @@ def test_big_dates(self, datapath): expected[5][5] = expected[5][6] = datetime(1678, 1, 1) expected = DataFrame(expected, columns=columns, dtype=object) + expected["date_tc"] = expected["date_tc"].astype("M8[ms]") + expected["date_td"] = expected["date_td"].astype("M8[s]") + expected["date_tm"] = expected["date_tm"].astype("M8[s]") + expected["date_tw"] = expected["date_tw"].astype("M8[s]") + expected["date_tq"] = expected["date_tq"].astype("M8[s]") + expected["date_th"] = expected["date_th"].astype("M8[s]") + expected["date_ty"] = expected["date_ty"].astype("M8[s]") + parsed_115 = read_stata(datapath("io", "data", "stata", "stata9_115.dta")) parsed_117 = read_stata(datapath("io", "data", "stata", "stata9_117.dta")) tm.assert_frame_equal(expected, parsed_115, check_datetimelike_compat=True) @@ -906,9 +930,7 @@ def test_dtype_conversion(self, datapath): expected["long_"] = expected["long_"].astype(np.int32) expected["float_"] = expected["float_"].astype(np.float32) expected["double_"] = expected["double_"].astype(np.float64) - expected["date_td"] = expected["date_td"].apply( - datetime.strptime, args=("%Y-%m-%d",) - ) + expected["date_td"] = expected["date_td"].astype("M8[s]") no_conversion = read_stata( datapath("io", "data", "stata", "stata6_117.dta"), convert_dates=True @@ -922,12 +944,10 @@ def test_dtype_conversion(self, datapath): ) # read_csv types are the same - expected = self.read_csv(datapath("io", "data", "stata", "stata6.csv")) - expected["date_td"] = expected["date_td"].apply( - datetime.strptime, args=("%Y-%m-%d",) - ) + expected2 = self.read_csv(datapath("io", "data", "stata", "stata6.csv")) + expected2["date_td"] = expected["date_td"] - tm.assert_frame_equal(expected, conversion) + tm.assert_frame_equal(expected2, conversion) def test_drop_column(self, datapath): expected = self.read_csv(datapath("io", "data", "stata", "stata6.csv")) @@ -1392,10 +1412,14 @@ def test_default_date_conversion(self): } ) + expected = original[:] + # "tc" for convert_dates below stores with "ms" resolution + expected["dates"] = expected["dates"].astype("M8[ms]") + with tm.ensure_clean() as path: original.to_stata(path, write_index=False) reread = read_stata(path, convert_dates=True) - tm.assert_frame_equal(original, reread) + tm.assert_frame_equal(expected, reread) original.to_stata(path, write_index=False, convert_dates={"dates": "tc"}) direct = read_stata(path, convert_dates=True) @@ -1655,11 +1679,14 @@ def test_writer_117(self): version=117, ) written_and_read_again = self.read_dta(path) - # original.index is np.int32, read index is np.int64 + + expected = original[:] + # "tc" for convert_dates means we store with "ms" resolution + expected["datetime"] = expected["datetime"].astype("M8[ms]") + tm.assert_frame_equal( written_and_read_again.set_index("index"), - original, - check_index_type=False, + expected, ) tm.assert_frame_equal(original, copy) @@ -1932,7 +1959,8 @@ def test_read_write_ea_dtypes(self, dtype_backend): "b": ["a", "b", "c"], "c": [1.0, 0, np.nan], "d": [1.5, 2.5, 3.5], - "e": pd.date_range("2020-12-31", periods=3, freq="D"), + # stata stores with ms unit, so unit does not round-trip exactly + "e": pd.date_range("2020-12-31", periods=3, freq="D", unit="ms"), }, index=pd.Index([0, 1, 2], name="index", dtype=np.int32), ) From 4f0870efecff989c211dd8cfe975ef2127cc86b8 Mon Sep 17 00:00:00 2001 From: rohanjain101 <38412262+rohanjain101@users.noreply.github.com> Date: Fri, 2 Feb 2024 12:57:54 -0500 Subject: [PATCH 20/50] Series.str.find fix for pd.ArrowDtype(pa.string()) (#56792) * fix find * gh reference * add test for Nones * fix min version compat * restore test * improve test cases * fix empty string * inline * improve tests * fix * Revert "fix" This reverts commit 7fa21eb24682ae587a0b3033942fbe1247f98921. * fix * merge * inline --------- Co-authored-by: Rohan Jain --- pandas/core/arrays/arrow/array.py | 28 ++++++---- pandas/tests/extension/test_arrow.py | 80 +++++++++++++++++++++++++--- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 392b4e3cc616a..7bab8c9395ac6 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2364,20 +2364,26 @@ def _str_fullmatch( return self._str_match(pat, case, flags, na) def _str_find(self, sub: str, start: int = 0, end: int | None = None) -> Self: - if start != 0 and end is not None: + if (start == 0 or start is None) and end is None: + result = pc.find_substring(self._pa_array, sub) + else: + if sub == "": + # GH 56792 + result = self._apply_elementwise(lambda val: val.find(sub, start, end)) + return type(self)(pa.chunked_array(result)) + if start is None: + start_offset = 0 + start = 0 + elif start < 0: + start_offset = pc.add(start, pc.utf8_length(self._pa_array)) + start_offset = pc.if_else(pc.less(start_offset, 0), 0, start_offset) + else: + start_offset = start slices = pc.utf8_slice_codeunits(self._pa_array, start, stop=end) result = pc.find_substring(slices, sub) - not_found = pc.equal(result, -1) - start_offset = max(0, start) + found = pc.not_equal(result, pa.scalar(-1, type=result.type)) offset_result = pc.add(result, start_offset) - result = pc.if_else(not_found, result, offset_result) - elif start == 0 and end is None: - slices = self._pa_array - result = pc.find_substring(slices, sub) - else: - raise NotImplementedError( - f"find not implemented with {sub=}, {start=}, {end=}" - ) + result = pc.if_else(found, offset_result, -1) return type(self)(result) def _str_join(self, sep: str) -> Self: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 62e4629ca7cb7..3ce2b38bf8644 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -23,6 +23,7 @@ BytesIO, StringIO, ) +from itertools import combinations import operator import pickle import re @@ -1933,13 +1934,18 @@ def test_str_fullmatch(pat, case, na, exp): @pytest.mark.parametrize( - "sub, start, end, exp, exp_typ", - [["ab", 0, None, [0, None], pa.int32()], ["bc", 1, 3, [1, None], pa.int64()]], + "sub, start, end, exp, exp_type", + [ + ["ab", 0, None, [0, None], pa.int32()], + ["bc", 1, 3, [1, None], pa.int64()], + ["ab", 1, 3, [-1, None], pa.int64()], + ["ab", -3, -3, [-1, None], pa.int64()], + ], ) -def test_str_find(sub, start, end, exp, exp_typ): +def test_str_find(sub, start, end, exp, exp_type): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) result = ser.str.find(sub, start=start, end=end) - expected = pd.Series(exp, dtype=ArrowDtype(exp_typ)) + expected = pd.Series(exp, dtype=ArrowDtype(exp_type)) tm.assert_series_equal(result, expected) @@ -1951,10 +1957,70 @@ def test_str_find_negative_start(): tm.assert_series_equal(result, expected) -def test_str_find_notimplemented(): +def test_str_find_no_end(): ser = pd.Series(["abc", None], dtype=ArrowDtype(pa.string())) - with pytest.raises(NotImplementedError, match="find not implemented"): - ser.str.find("ab", start=1) + if pa_version_under13p0: + # https://github.com/apache/arrow/issues/36311 + with pytest.raises(pa.lib.ArrowInvalid, match="Negative buffer resize"): + ser.str.find("ab", start=1) + else: + result = ser.str.find("ab", start=1) + expected = pd.Series([-1, None], dtype="int64[pyarrow]") + tm.assert_series_equal(result, expected) + + +def test_str_find_negative_start_negative_end(): + # GH 56791 + ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string())) + result = ser.str.find(sub="d", start=-6, end=-3) + expected = pd.Series([3, None], dtype=ArrowDtype(pa.int64())) + tm.assert_series_equal(result, expected) + + +def test_str_find_large_start(): + # GH 56791 + ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string())) + if pa_version_under13p0: + # https://github.com/apache/arrow/issues/36311 + with pytest.raises(pa.lib.ArrowInvalid, match="Negative buffer resize"): + ser.str.find(sub="d", start=16) + else: + result = ser.str.find(sub="d", start=16) + expected = pd.Series([-1, None], dtype=ArrowDtype(pa.int64())) + tm.assert_series_equal(result, expected) + + +@pytest.mark.skipif( + pa_version_under13p0, reason="https://github.com/apache/arrow/issues/36311" +) +@pytest.mark.parametrize("start", list(range(-15, 15)) + [None]) +@pytest.mark.parametrize("end", list(range(-15, 15)) + [None]) +@pytest.mark.parametrize( + "sub", + ["abcaadef"[x:y] for x, y in combinations(range(len("abcaadef") + 1), r=2)] + + [ + "", + "az", + "abce", + ], +) +def test_str_find_e2e(start, end, sub): + s = pd.Series( + ["abcaadef", "abc", "abcdeddefgj8292", "ab", "a", ""], + dtype=ArrowDtype(pa.string()), + ) + object_series = s.astype(pd.StringDtype()) + result = s.str.find(sub, start, end) + expected = object_series.str.find(sub, start, end).astype(result.dtype) + tm.assert_series_equal(result, expected) + + +def test_str_find_negative_start_negative_end_no_match(): + # GH 56791 + ser = pd.Series(["abcdefg", None], dtype=ArrowDtype(pa.string())) + result = ser.str.find(sub="d", start=-3, end=-6) + expected = pd.Series([-1, None], dtype=ArrowDtype(pa.int64())) + tm.assert_series_equal(result, expected) @pytest.mark.parametrize( From 77188e0e04db70c7a1bb63008388791db37ed810 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Fri, 2 Feb 2024 17:54:29 -0500 Subject: [PATCH 21/50] DEPR: Enforce deprecation of groupby(...).grouper (#57207) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/groupby/groupby.py | 11 ----------- pandas/core/groupby/grouper.py | 11 ----------- pandas/tests/groupby/test_grouping.py | 17 +++-------------- 4 files changed, 4 insertions(+), 36 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 73776c2d6ee21..25163a0f678b0 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -103,6 +103,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) +- Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) - Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 68626534f1e74..64f882e5a146c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -794,17 +794,6 @@ def __repr__(self) -> str: # TODO: Better repr for GroupBy object return object.__repr__(self) - @final - @property - def grouper(self) -> ops.BaseGrouper: - warnings.warn( - f"{type(self).__name__}.grouper is deprecated and will be removed in a " - "future version of pandas.", - category=FutureWarning, - stacklevel=find_stack_level(), - ) - return self._grouper - @final @property def groups(self) -> dict[Hashable, np.ndarray]: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 4b9cf5ab75525..f377c9d03d05a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -428,17 +428,6 @@ def obj(self): ) return self._obj_deprecated - @final - @property - def grouper(self): - warnings.warn( - f"{type(self).__name__}.grouper is deprecated and will be removed " - "in a future version. Use GroupBy.grouper instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - return self._grouper_deprecated - @final @property def groups(self): diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 841dd29edab10..ee1df1242442f 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -414,9 +414,7 @@ def test_grouper_getting_correct_binner(self): def test_grouper_iter(self, df): gb = df.groupby("A") - msg = "DataFrameGroupBy.grouper is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouper = gb.grouper + grouper = gb._grouper result = sorted(grouper) expected = ["bar", "foo"] assert result == expected @@ -428,9 +426,7 @@ def test_empty_groups(self, df): def test_groupby_grouper(self, df): grouped = df.groupby("A") - msg = "DataFrameGroupBy.grouper is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouper = grouped.grouper + grouper = grouped._grouper result = df.groupby(grouper).mean(numeric_only=True) expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -791,9 +787,7 @@ def test_groupby_empty(self): # check name gb = s.groupby(s) - msg = "SeriesGroupBy.grouper is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouper = gb.grouper + grouper = gb._grouper result = grouper.names expected = ["name"] assert result == expected @@ -1155,11 +1149,6 @@ def test_grouper_groups(): res = grper.groups assert res is gb.groups - msg = "Use GroupBy.grouper instead" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = grper.grouper - assert res is gb._grouper - msg = "Grouper.obj is deprecated and will be removed" with tm.assert_produces_warning(FutureWarning, match=msg): res = grper.obj From b51c38aed7f80807273c6facb8c8ab4a422aded8 Mon Sep 17 00:00:00 2001 From: "Christopher Horn, PhD" <44479110+chrish935@users.noreply.github.com> Date: Fri, 2 Feb 2024 20:23:00 -0500 Subject: [PATCH 22/50] BUG: Fixing broken link in getting_started.md (#57218) Fixing broken link in getting_started.md --- web/pandas/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/pandas/getting_started.md b/web/pandas/getting_started.md index cb14e52edad2c..61ef26f984567 100644 --- a/web/pandas/getting_started.md +++ b/web/pandas/getting_started.md @@ -6,7 +6,7 @@ The next steps provides the easiest and recommended way to set up your environment to use pandas. Other installation options can be found in the [advanced installation page]({{ base_url}}docs/getting_started/install.html). -1. Download [Anaconda](https://www.anaconda.com/distribution/) for your operating system and +1. Download [Anaconda](https://www.anaconda.com/download/) for your operating system and the latest Python version, run the installer, and follow the steps. Please note: - It is not needed (and discouraged) to install Anaconda as root or administrator. From 8d1dd0783f709b4a32bdccdb2dba150a3b30b8cb Mon Sep 17 00:00:00 2001 From: jrmylow <33999325+jrmylow@users.noreply.github.com> Date: Sat, 3 Feb 2024 19:29:59 +0800 Subject: [PATCH 23/50] CI: Use sys.executable instead of "python" in subprocess (#57220) --- scripts/validate_docstrings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 682d64244bc1f..d54592252206e 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -188,7 +188,7 @@ def validate_pep8(self): file.write(content) file.flush() cmd = [ - "python", + sys.executable, "-m", "flake8", "--format=%(row)d\t%(col)d\t%(code)s\t%(text)s", From 3c403922a984f002616bfb731f426f3a383ee935 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 3 Feb 2024 08:00:15 -1000 Subject: [PATCH 24/50] TST: Reduce parameterization of test_str_find_e2e (#57221) --- pandas/tests/extension/test_arrow.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 3ce2b38bf8644..b49f107859159 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -23,7 +23,6 @@ BytesIO, StringIO, ) -from itertools import combinations import operator import pickle import re @@ -1993,17 +1992,9 @@ def test_str_find_large_start(): @pytest.mark.skipif( pa_version_under13p0, reason="https://github.com/apache/arrow/issues/36311" ) -@pytest.mark.parametrize("start", list(range(-15, 15)) + [None]) -@pytest.mark.parametrize("end", list(range(-15, 15)) + [None]) -@pytest.mark.parametrize( - "sub", - ["abcaadef"[x:y] for x, y in combinations(range(len("abcaadef") + 1), r=2)] - + [ - "", - "az", - "abce", - ], -) +@pytest.mark.parametrize("start", [-15, -3, 0, 1, 15, None]) +@pytest.mark.parametrize("end", [-15, -1, 0, 3, 15, None]) +@pytest.mark.parametrize("sub", ["", "az", "abce", "a", "caa"]) def test_str_find_e2e(start, end, sub): s = pd.Series( ["abcaadef", "abc", "abcdeddefgj8292", "ab", "a", ""], From cb16bb085d79c5eab3da1021e684d1d8e594d802 Mon Sep 17 00:00:00 2001 From: partev Date: Sat, 3 Feb 2024 15:01:01 -0500 Subject: [PATCH 25/50] Update style.ipynb http -> https (#57227) http://seaborn.pydata.org/ -> https://seaborn.pydata.org/ --- doc/source/user_guide/style.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index f22a506499cf4..2d4b0f6a7545e 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -1280,7 +1280,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](http://seaborn.pydata.org/) to get a nice colormap." + "You can create \"heatmaps\" with the `background_gradient` and `text_gradient` methods. These require matplotlib, and we'll use [Seaborn](https://seaborn.pydata.org/) to get a nice colormap." ] }, { From e69a051c4b88b4697866f593c433f1a3135eceec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <6618166+twoertwein@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:24:50 -0500 Subject: [PATCH 26/50] TYP: misc IO return types (#57228) * TYP: misc IO return types * isort --- pandas/_typing.py | 3 +++ pandas/io/excel/_openpyxl.py | 3 ++- pandas/io/formats/excel.py | 4 ++- pandas/io/formats/format.py | 6 +++-- pandas/io/formats/info.py | 8 +++--- pandas/io/formats/style.py | 2 +- pandas/io/formats/style_render.py | 4 ++- pandas/io/html.py | 2 +- pandas/io/json/_normalize.py | 27 ++++++++++++++++++-- pandas/io/parquet.py | 2 +- pandas/io/parsers/base_parser.py | 36 +++++++++++++++------------ pandas/io/parsers/c_parser_wrapper.py | 14 ++++++++--- pandas/io/parsers/python_parser.py | 1 + pandas/io/parsers/readers.py | 2 +- pandas/io/sas/sas7bdat.py | 5 ++-- pandas/io/sql.py | 15 +++++------ pandas/io/stata.py | 2 +- 17 files changed, 91 insertions(+), 45 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index c704516f74300..1fec41463904c 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -529,3 +529,6 @@ def closed(self) -> bool: Callable[[HashableT], bool], None, ] + +# maintaine the sub-type of any hashable sequence +SequenceT = TypeVar("SequenceT", bound=Sequence[Hashable]) diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index c546443868a62..218a592c22b4a 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -26,6 +26,7 @@ if TYPE_CHECKING: from openpyxl import Workbook from openpyxl.descriptors.serialisable import Serialisable + from openpyxl.styles import Fill from pandas._typing import ( ExcelWriterIfSheetExists, @@ -244,7 +245,7 @@ def _convert_to_stop(cls, stop_seq): return map(cls._convert_to_color, stop_seq) @classmethod - def _convert_to_fill(cls, fill_dict: dict[str, Any]): + def _convert_to_fill(cls, fill_dict: dict[str, Any]) -> Fill: """ Convert ``fill_dict`` to an openpyxl v2 Fill object. diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 0c3a53eb1cfea..892f69e76359b 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -284,7 +284,9 @@ def build_border( for side in ["top", "right", "bottom", "left"] } - def _border_style(self, style: str | None, width: str | None, color: str | None): + def _border_style( + self, style: str | None, width: str | None, color: str | None + ) -> str | None: # convert styles and widths to openxml, one of: # 'dashDot' # 'dashDotDot' diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 00c7526edfa48..65124f97459cd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1346,7 +1346,9 @@ def get_result_as_array(self) -> np.ndarray: the parameters given at initialisation, as a numpy array """ - def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): + def format_with_na_rep( + values: ArrayLike, formatter: Callable, na_rep: str + ) -> np.ndarray: mask = isna(values) formatted = np.array( [ @@ -1358,7 +1360,7 @@ def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str): def format_complex_with_na_rep( values: ArrayLike, formatter: Callable, na_rep: str - ): + ) -> np.ndarray: real_values = np.real(values).ravel() # type: ignore[arg-type] imag_values = np.imag(values).ravel() # type: ignore[arg-type] real_mask, imag_mask = isna(real_values), isna(imag_values) diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 552affbd053f2..2d28b032ca49d 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -392,7 +392,7 @@ def dtype_counts(self) -> Mapping[str, int]: @property @abstractmethod - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> list[int] | Series: """Sequence of non-null counts for all columns or column (if series).""" @property @@ -486,7 +486,7 @@ def col_count(self) -> int: return len(self.ids) @property - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> Series: """Sequence of non-null counts for all columns or column (if series).""" return self.data.count() @@ -546,7 +546,7 @@ def render( printer.to_buffer(buf) @property - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> list[int]: return [self.data.count()] @property @@ -750,7 +750,7 @@ def memory_usage_string(self) -> str: return self.info.memory_usage_string @property - def non_null_counts(self) -> Sequence[int]: + def non_null_counts(self) -> list[int] | Series: return self.info.non_null_counts def add_object_type_line(self) -> None: diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c85c6c3ef0ff7..0e67949709a22 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3827,7 +3827,7 @@ def _background_gradient( vmax: float | None = None, gmap: Sequence | np.ndarray | DataFrame | Series | None = None, text_only: bool = False, -): +) -> list[str] | DataFrame: """ Color background in a range according to the data or a gradient map """ diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 80df46bf2336a..4ba094ec614d0 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -2030,7 +2030,9 @@ def _class_styles(self): } ] - def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str): + def _pseudo_css( + self, uuid: str, name: str, row: int, col: int, text: str + ) -> list[CSSDict]: """ For every table data-cell that has a valid tooltip (not None, NaN or empty string) must create two pseudo CSS entries for the specific diff --git a/pandas/io/html.py b/pandas/io/html.py index 0f3704b698915..302f901aa0d16 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -469,7 +469,7 @@ def row_is_all_th(row): def _expand_colspan_rowspan( self, rows, section: Literal["header", "footer", "body"] - ): + ) -> list[list]: """ Given a list of s, return a list of text rows. diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 39fbce0b6901c..49f95430d9bb9 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -11,6 +11,7 @@ TYPE_CHECKING, Any, DefaultDict, + overload, ) import numpy as np @@ -42,13 +43,35 @@ def convert_to_line_delimits(s: str) -> str: return convert_json_to_lines(s) +@overload def nested_to_record( - ds, + ds: dict, + prefix: str = ..., + sep: str = ..., + level: int = ..., + max_level: int | None = ..., +) -> dict[str, Any]: + ... + + +@overload +def nested_to_record( + ds: list[dict], + prefix: str = ..., + sep: str = ..., + level: int = ..., + max_level: int | None = ..., +) -> list[dict[str, Any]]: + ... + + +def nested_to_record( + ds: dict | list[dict], prefix: str = "", sep: str = ".", level: int = 0, max_level: int | None = None, -): +) -> dict[str, Any] | list[dict[str, Any]]: """ A simplified json_normalize diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index e04a3acc829f1..a6d58d6cffb10 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -150,7 +150,7 @@ def validate_dataframe(df: DataFrame) -> None: if not isinstance(df, DataFrame): raise ValueError("to_parquet only supports IO with DataFrames") - def write(self, df: DataFrame, path, compression, **kwargs): + def write(self, df: DataFrame, path, compression, **kwargs) -> None: raise AbstractMethodError(self) def read(self, path, columns=None, **kwargs) -> DataFrame: diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 09f0f2af8e5c6..3aef0692d5f59 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -84,7 +84,6 @@ if TYPE_CHECKING: from collections.abc import ( - Hashable, Iterable, Mapping, Sequence, @@ -94,7 +93,10 @@ ArrayLike, DtypeArg, DtypeObj, + Hashable, + HashableT, Scalar, + SequenceT, ) @@ -350,13 +352,13 @@ def extract(r): @final def _maybe_make_multi_index_columns( self, - columns: Sequence[Hashable], + columns: SequenceT, col_names: Sequence[Hashable] | None = None, - ) -> Sequence[Hashable] | MultiIndex: + ) -> SequenceT | MultiIndex: # possibly create a column mi here if is_potential_multi_index(columns): - list_columns = cast(list[tuple], columns) - return MultiIndex.from_tuples(list_columns, names=col_names) + columns_mi = cast("Sequence[tuple[Hashable, ...]]", columns) + return MultiIndex.from_tuples(columns_mi, names=col_names) return columns @final @@ -520,7 +522,7 @@ def _convert_to_ndarrays( verbose: bool = False, converters=None, dtypes=None, - ): + ) -> dict[Any, np.ndarray]: result = {} for c, values in dct.items(): conv_f = None if converters is None else converters.get(c, None) @@ -923,23 +925,23 @@ def _check_data_length( @overload def _evaluate_usecols( self, - usecols: set[int] | Callable[[Hashable], object], - names: Sequence[Hashable], + usecols: Callable[[Hashable], object], + names: Iterable[Hashable], ) -> set[int]: ... @overload def _evaluate_usecols( - self, usecols: set[str], names: Sequence[Hashable] - ) -> set[str]: + self, usecols: SequenceT, names: Iterable[Hashable] + ) -> SequenceT: ... @final def _evaluate_usecols( self, - usecols: Callable[[Hashable], object] | set[str] | set[int], - names: Sequence[Hashable], - ) -> set[str] | set[int]: + usecols: Callable[[Hashable], object] | SequenceT, + names: Iterable[Hashable], + ) -> SequenceT | set[int]: """ Check whether or not the 'usecols' parameter is a callable. If so, enumerates the 'names' @@ -952,7 +954,7 @@ def _evaluate_usecols( return usecols @final - def _validate_usecols_names(self, usecols, names: Sequence): + def _validate_usecols_names(self, usecols: SequenceT, names: Sequence) -> SequenceT: """ Validates that all usecols are present in a given list of names. If not, raise a ValueError that @@ -1072,7 +1074,9 @@ def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, lis return index_names, columns, index_col @final - def _get_empty_meta(self, columns, dtype: DtypeArg | None = None): + def _get_empty_meta( + self, columns: Sequence[HashableT], dtype: DtypeArg | None = None + ) -> tuple[Index, list[HashableT], dict[HashableT, Series]]: columns = list(columns) index_col = self.index_col @@ -1275,7 +1279,7 @@ def _process_date_conversion( columns, keep_date_col: bool = False, dtype_backend=lib.no_default, -): +) -> tuple[dict, list]: def _isindex(colspec): return (isinstance(index_col, list) and colspec in index_col) or ( isinstance(index_names, list) and colspec in index_names diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 0cd788c5e5739..f24d7a628998e 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -41,10 +41,12 @@ ) from pandas._typing import ( + AnyArrayLike, ArrayLike, DtypeArg, DtypeObj, ReadCsvBuffer, + SequenceT, ) from pandas import ( @@ -225,7 +227,7 @@ def read( ) -> tuple[ Index | MultiIndex | None, Sequence[Hashable] | MultiIndex, - Mapping[Hashable, ArrayLike], + Mapping[Hashable, AnyArrayLike], ]: index: Index | MultiIndex | None column_names: Sequence[Hashable] | MultiIndex @@ -248,7 +250,11 @@ def read( names, dtype=self.dtype, ) - columns = self._maybe_make_multi_index_columns(columns, self.col_names) + # error: Incompatible types in assignment (expression has type + # "list[Hashable] | MultiIndex", variable has type "list[Hashable]") + columns = self._maybe_make_multi_index_columns( # type: ignore[assignment] + columns, self.col_names + ) if self.usecols is not None: columns = self._filter_usecols(columns) @@ -334,11 +340,11 @@ def read( return index, column_names, date_data - def _filter_usecols(self, names: Sequence[Hashable]) -> Sequence[Hashable]: + def _filter_usecols(self, names: SequenceT) -> SequenceT | list[Hashable]: # hackish usecols = self._evaluate_usecols(self.usecols, names) if usecols is not None and len(names) != len(usecols): - names = [ + return [ name for i, name in enumerate(names) if i in usecols or name in usecols ] return names diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index e830db559c5a5..dbda47172f6ac 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -266,6 +266,7 @@ def read( # done with first read, next time raise StopIteration self._first_chunk = False + index: Index | None columns: Sequence[Hashable] = list(self.orig_names) if not len(content): # pragma: no cover # DataFrame with the right metadata, even though it's length 0 diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index d35c153459bf8..71e1a31759a0c 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -2101,7 +2101,7 @@ def _floatify_na_values(na_values): return result -def _stringify_na_values(na_values, floatify: bool): +def _stringify_na_values(na_values, floatify: bool) -> set[str | float]: """return a stringified and numeric for these values""" result: list[str | float] = [] for x in na_values: diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 895079bc15588..275fad2a565bf 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -54,6 +54,7 @@ from pandas._typing import ( CompressionOptions, FilePath, + NaTType, ReadBuffer, ) @@ -62,7 +63,7 @@ _sas_origin = Timestamp("1960-01-01") -def _parse_datetime(sas_datetime: float, unit: str): +def _parse_datetime(sas_datetime: float, unit: str) -> datetime | NaTType: if isna(sas_datetime): return pd.NaT @@ -326,7 +327,7 @@ def __next__(self) -> DataFrame: return da # Read a single float of the given width (4 or 8). - def _read_float(self, offset: int, width: int): + def _read_float(self, offset: int, width: int) -> float: assert self._cached_page is not None if width == 4: return read_float_with_byteswap( diff --git a/pandas/io/sql.py b/pandas/io/sql.py index d9a5e6dfd0cf8..4e0ddd0f56ba8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -67,6 +67,7 @@ if TYPE_CHECKING: from collections.abc import ( + Generator, Iterator, Mapping, ) @@ -136,7 +137,7 @@ def _handle_date_column( return to_datetime(col, errors="coerce", format=format, utc=utc) -def _parse_date_columns(data_frame, parse_dates): +def _parse_date_columns(data_frame: DataFrame, parse_dates) -> DataFrame: """ Force non-datetime columns to be read as such. Supports both string formatted and integer timestamp columns. @@ -199,7 +200,7 @@ def _wrap_result( parse_dates=None, dtype: DtypeArg | None = None, dtype_backend: DtypeBackend | Literal["numpy"] = "numpy", -): +) -> DataFrame: """Wrap result set of a SQLAlchemy query in a DataFrame.""" frame = _convert_arrays_to_dataframe(data, columns, coerce_float, dtype_backend) @@ -1153,7 +1154,7 @@ def _query_iterator( coerce_float: bool = True, parse_dates=None, dtype_backend: DtypeBackend | Literal["numpy"] = "numpy", - ): + ) -> Generator[DataFrame, None, None]: """Return generator through chunked result set.""" has_read_data = False with exit_stack: @@ -1765,7 +1766,7 @@ def _query_iterator( parse_dates=None, dtype: DtypeArg | None = None, dtype_backend: DtypeBackend | Literal["numpy"] = "numpy", - ): + ) -> Generator[DataFrame, None, None]: """Return generator through chunked result set""" has_read_data = False with exit_stack: @@ -2466,7 +2467,7 @@ def _create_sql_schema( } -def _get_unicode_name(name: object): +def _get_unicode_name(name: object) -> str: try: uname = str(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError as err: @@ -2474,7 +2475,7 @@ def _get_unicode_name(name: object): return uname -def _get_valid_sqlite_name(name: object): +def _get_valid_sqlite_name(name: object) -> str: # See https://stackoverflow.com/questions/6514274/how-do-you-escape-strings\ # -for-sqlite-table-column-names-in-python # Ensure the string can be encoded as UTF-8. @@ -2712,7 +2713,7 @@ def _query_iterator( parse_dates=None, dtype: DtypeArg | None = None, dtype_backend: DtypeBackend | Literal["numpy"] = "numpy", - ): + ) -> Generator[DataFrame, None, None]: """Return generator through chunked result set""" has_read_data = False while True: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index a2c15938c04bf..447c97d078e02 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -373,7 +373,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: def parse_dates_safe( dates: Series, delta: bool = False, year: bool = False, days: bool = False - ): + ) -> DataFrame: d = {} if lib.is_np_dtype(dates.dtype, "M"): if delta: From 94d575a724f6832c47c896356de201fbe4bfeae5 Mon Sep 17 00:00:00 2001 From: Steven Schaerer <53116297+stevenschaerer@users.noreply.github.com> Date: Sat, 3 Feb 2024 22:54:36 +0100 Subject: [PATCH 27/50] BUG: Raise if an aggregation function other than mean is used with ewm and times (#57225) * BUG: Raise if an aggregation function other than mean is used with ewm (#51695) * python 3.9 and mypy issue --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/window/ewm.py | 12 ++++++++++++ pandas/tests/window/test_ewm.py | 12 ++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 25163a0f678b0..806a46c248e15 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -206,7 +206,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`) -- +- Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 3c07fc156aea1..01d1787d46ca0 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -587,6 +587,8 @@ def sum( ): if not self.adjust: raise NotImplementedError("sum is not implemented with adjust=False") + if self.times is not None: + raise NotImplementedError("sum is not implemented with times") if maybe_use_numba(engine): if self.method == "single": func = generate_numba_ewm_func @@ -658,6 +660,8 @@ def std(self, bias: bool = False, numeric_only: bool = False): raise NotImplementedError( f"{type(self).__name__}.std does not implement numeric_only" ) + if self.times is not None: + raise NotImplementedError("std is not implemented with times") return zsqrt(self.var(bias=bias, numeric_only=numeric_only)) @doc( @@ -691,6 +695,8 @@ def std(self, bias: bool = False, numeric_only: bool = False): agg_method="var", ) def var(self, bias: bool = False, numeric_only: bool = False): + if self.times is not None: + raise NotImplementedError("var is not implemented with times") window_func = window_aggregations.ewmcov wfunc = partial( window_func, @@ -753,6 +759,9 @@ def cov( bias: bool = False, numeric_only: bool = False, ): + if self.times is not None: + raise NotImplementedError("cov is not implemented with times") + from pandas import Series self._validate_numeric_only("cov", numeric_only) @@ -837,6 +846,9 @@ def corr( pairwise: bool | None = None, numeric_only: bool = False, ): + if self.times is not None: + raise NotImplementedError("corr is not implemented with times") + from pandas import Series self._validate_numeric_only("corr", numeric_only) diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 2e2cfa156019f..35c896dc0090b 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -173,6 +173,18 @@ def test_ewm_sum_adjust_false_notimplemented(): data.sum() +@pytest.mark.parametrize("method", ["sum", "std", "var", "cov", "corr"]) +def test_times_only_mean_implemented(frame_or_series, method): + # GH 51695 + halflife = "1 day" + times = date_range("2000", freq="D", periods=10) + ewm = frame_or_series(range(10)).ewm(halflife=halflife, times=times) + with pytest.raises( + NotImplementedError, match=f"{method} is not implemented with times" + ): + getattr(ewm, method)() + + @pytest.mark.parametrize( "expected_data, ignore", [[[10.0, 5.0, 2.5, 11.25], False], [[10.0, 5.0, 5.0, 12.5], True]], From 937335f63ba13c6a4856b150974beb61ee96da29 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sat, 3 Feb 2024 18:07:01 -0700 Subject: [PATCH 28/50] DOC: fix PR02 errors in docstring for pandas.io.formats.style.Styler.to_excel (#57230) --- ci/code_checks.sh | 1 - pandas/core/generic.py | 12 +++++++++--- pandas/io/formats/style.py | 1 + 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index a09c4662a1fd9..2eb5b73d68964 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -71,7 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (PR02)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \ - pandas.io.formats.style.Styler.to_excel\ pandas.CategoricalIndex.rename_categories\ pandas.CategoricalIndex.reorder_categories\ pandas.CategoricalIndex.add_categories\ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 78fbb66635dd1..0a4488593495f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -220,6 +220,8 @@ from pandas.core.indexers.objects import BaseIndexer from pandas.core.resample import Resampler +import textwrap + # goal is to be able to define the docs close to function, while still being # able to share _shared_docs = {**_shared_docs} @@ -2240,6 +2242,12 @@ def _repr_data_resource_(self): klass="object", storage_options=_shared_docs["storage_options"], storage_options_versionadded="1.2.0", + extra_parameters=textwrap.dedent( + """\ + engine_kwargs : dict, optional + Arbitrary keyword arguments passed to excel engine. + """ + ), ) def to_excel( self, @@ -2315,9 +2323,7 @@ def to_excel( {storage_options} .. versionadded:: {storage_options_versionadded} - engine_kwargs : dict, optional - Arbitrary keyword arguments passed to excel engine. - + {extra_parameters} See Also -------- to_csv : Write DataFrame to a comma-separated values (csv) file. diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 0e67949709a22..3a6a44a8be253 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -516,6 +516,7 @@ def set_tooltips( klass="Styler", storage_options=_shared_docs["storage_options"], storage_options_versionadded="1.5.0", + extra_parameters="", ) def to_excel( self, From b3ea5f4bb535c8d4fe232fa3994bfa0ef79c5ef7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 01:11:11 +0000 Subject: [PATCH 29/50] Remove Copy-on-Write warning mode (#57234) --- pandas/_config/__init__.py | 5 -- pandas/core/frame.py | 15 +--- pandas/core/generic.py | 112 +------------------------ pandas/core/groupby/grouper.py | 7 +- pandas/core/indexing.py | 21 +---- pandas/core/internals/blocks.py | 130 +----------------------------- pandas/core/internals/managers.py | 79 ++---------------- pandas/core/series.py | 74 ++++------------- pandas/errors/cow.py | 20 ----- 9 files changed, 35 insertions(+), 428 deletions(-) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 0594d1c190a72..9784303fc0b87 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -15,7 +15,6 @@ "option_context", "options", "using_copy_on_write", - "warn_copy_on_write", ] from pandas._config import config from pandas._config import dates # pyright: ignore[reportUnusedImport] # noqa: F401 @@ -35,10 +34,6 @@ def using_copy_on_write() -> bool: return True -def warn_copy_on_write() -> bool: - return False - - def using_nullable_dtypes() -> bool: _mode_options = _global_config["mode"] return _mode_options["nullable_dtypes"] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 207e3e7635cac..b8b5df6e5145b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -42,7 +42,6 @@ from pandas._config import ( get_option, using_copy_on_write, - warn_copy_on_write, ) from pandas._libs import ( @@ -64,7 +63,6 @@ _chained_assignment_method_msg, _chained_assignment_msg, _chained_assignment_warning_method_msg, - _chained_assignment_warning_msg, ) from pandas.util._decorators import ( Appender, @@ -4199,17 +4197,6 @@ def __setitem__(self, key, value) -> None: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) - elif not PYPY and not using_copy_on_write(): - if sys.getrefcount(self) <= 3 and ( - warn_copy_on_write() - or ( - not warn_copy_on_write() - and any(b.refs.has_reference() for b in self._mgr.blocks) - ) - ): - warnings.warn( - _chained_assignment_warning_msg, FutureWarning, stacklevel=2 - ) key = com.apply_if_callable(key, self) @@ -4550,7 +4537,7 @@ def _clear_item_cache(self) -> None: def _get_item_cache(self, item: Hashable) -> Series: """Return the cached item, item represents a label indexer.""" - if using_copy_on_write() or warn_copy_on_write(): + if using_copy_on_write(): loc = self.columns.get_loc(item) return self._ixs(loc, axis=1) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0a4488593495f..490a47d16871c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -30,7 +30,6 @@ from pandas._config import ( config, using_copy_on_write, - warn_copy_on_write, ) from pandas._libs import lib @@ -105,7 +104,6 @@ from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_warning_method_msg, - _check_cacher, ) from pandas.util._decorators import ( deprecate_nonkeyword_arguments, @@ -4407,7 +4405,7 @@ def _check_setitem_copy(self, t: str = "setting", force: bool_t = False) -> None df.iloc[0:5]['group'] = 'a' """ - if using_copy_on_write() or warn_copy_on_write(): + if using_copy_on_write(): return # return early if the check is not needed @@ -7256,22 +7254,6 @@ def fillna( ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and _check_cacher(self): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) value, method = validate_fillna_kwargs(value, method) if method is not None: @@ -7559,22 +7541,6 @@ def ffill( ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and _check_cacher(self): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) return self._pad_or_backfill( "ffill", @@ -7763,22 +7729,6 @@ def bfill( ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and _check_cacher(self): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) return self._pad_or_backfill( "bfill", @@ -7934,26 +7884,6 @@ def replace( ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and _check_cacher(self): - # in non-CoW mode, chained Series access will populate the - # `_item_cache` which results in an increased ref count not below - # the threshold, while we still need to warn. We detect this case - # of a Series derived from a DataFrame through the presence of - # checking the `_cacher` - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) if not is_bool(regex) and to_replace is not None: raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool") @@ -8384,22 +8314,6 @@ def interpolate( ChainedAssignmentError, stacklevel=2, ) - elif ( - not PYPY - and not using_copy_on_write() - and self._is_view_after_cow_rules() - ): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if isinstance(self, ABCSeries) and _check_cacher(self): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) axis = self._get_axis_number(axis) @@ -10569,7 +10483,6 @@ def _where( inplace: bool_t = False, axis: Axis | None = None, level=None, - warn: bool_t = True, ): """ Equivalent to public method `where`, except that `other` is not @@ -10700,7 +10613,7 @@ def _where( # we may have different type blocks come out of putmask, so # reconstruct the block manager - new_data = self._mgr.putmask(mask=cond, new=other, align=align, warn=warn) + new_data = self._mgr.putmask(mask=cond, new=other, align=align) result = self._constructor_from_mgr(new_data, axes=new_data.axes) return self._update_inplace(result) @@ -12566,29 +12479,8 @@ def _inplace_method(self, other, op) -> Self: """ Wrap arithmetic method to operate inplace. """ - warn = True - if not PYPY and warn_copy_on_write(): - if sys.getrefcount(self) <= REF_COUNT + 2: - # we are probably in an inplace setitem context (e.g. df['a'] += 1) - warn = False - result = op(self, other) - if ( - self.ndim == 1 - and result._indexed_same(self) - and result.dtype == self.dtype - and not using_copy_on_write() - and not (warn_copy_on_write() and not warn) - ): - # GH#36498 this inplace op can _actually_ be inplace. - # Item "BlockManager" of "Union[BlockManager, SingleBlockManager]" has - # no attribute "setitem_inplace" - self._mgr.setitem_inplace( # type: ignore[union-attr] - slice(None), result._values, warn=warn - ) - return self - # Delete cacher self._reset_cacher() diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index f377c9d03d05a..1e6658e5dfd39 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -12,10 +12,7 @@ import numpy as np -from pandas._config import ( - using_copy_on_write, - warn_copy_on_write, -) +from pandas._config import using_copy_on_write from pandas._libs.tslibs import OutOfBoundsDatetime from pandas.errors import InvalidIndexError @@ -962,7 +959,7 @@ def is_in_axis(key) -> bool: def is_in_obj(gpr) -> bool: if not hasattr(gpr, "name"): return False - if using_copy_on_write() or warn_copy_on_write(): + if using_copy_on_write(): # For the CoW case, we check the references to determine if the # series is part of the object try: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 24f3ff4279a84..b58c3179dec09 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,10 +13,7 @@ import numpy as np -from pandas._config import ( - using_copy_on_write, - warn_copy_on_write, -) +from pandas._config import using_copy_on_write from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim @@ -28,11 +25,7 @@ InvalidIndexError, LossySetitemError, ) -from pandas.errors.cow import ( - _chained_assignment_msg, - _chained_assignment_warning_msg, - _check_cacher, -) +from pandas.errors.cow import _chained_assignment_msg from pandas.util._decorators import doc from pandas.util._exceptions import find_stack_level @@ -889,16 +882,6 @@ def __setitem__(self, key, value) -> None: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) - elif not PYPY and not using_copy_on_write(): - ctr = sys.getrefcount(self.obj) - ref_count = 2 - if not warn_copy_on_write() and _check_cacher(self.obj): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_msg, FutureWarning, stacklevel=2 - ) check_dict_or_set_indexers(key) if isinstance(key, tuple): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1237c5b86d298..bb65e7a4d0838 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -19,7 +19,6 @@ from pandas._config import ( get_option, using_copy_on_write, - warn_copy_on_write, ) from pandas._libs import ( @@ -834,7 +833,6 @@ def replace( # mask may be pre-computed if we're called from replace_list mask: npt.NDArray[np.bool_] | None = None, using_cow: bool = False, - already_warned=None, ) -> list[Block]: """ replace the to_replace value with value, possible to create new @@ -879,19 +877,6 @@ def replace( # and rest? blk = self._maybe_copy(using_cow, inplace) putmask_inplace(blk.values, mask, value) - if ( - inplace - and warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True if not (self.is_object and value is None): # if the user *explicitly* gave None, we keep None, otherwise @@ -953,7 +938,6 @@ def _replace_regex( inplace: bool = False, mask=None, using_cow: bool = False, - already_warned=None, ) -> list[Block]: """ Replace elements by the given value. @@ -988,20 +972,6 @@ def _replace_regex( replace_regex(block.values, rx, value, mask) - if ( - inplace - and warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True - nbs = block.convert(copy=False, using_cow=using_cow) opt = get_option("future.no_silent_downcasting") if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt: @@ -1026,7 +996,6 @@ def replace_list( inplace: bool = False, regex: bool = False, using_cow: bool = False, - already_warned=None, ) -> list[Block]: """ See BlockManager.replace_list docstring. @@ -1083,20 +1052,6 @@ def replace_list( else: rb = [self if inplace else self.copy()] - if ( - inplace - and warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True - opt = get_option("future.no_silent_downcasting") for i, ((src, dest), mask) in enumerate(zip(pairs, masks)): convert = i == src_len # only convert once at the end @@ -1428,9 +1383,7 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: values[indexer] = casted return self - def putmask( - self, mask, new, using_cow: bool = False, already_warned=None - ) -> list[Block]: + def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1463,19 +1416,6 @@ def putmask( return [self.copy(deep=False)] return [self] - if ( - warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True - try: casted = np_can_hold_element(values.dtype, new) @@ -1640,7 +1580,6 @@ def fillna( inplace: bool = False, downcast=None, using_cow: bool = False, - already_warned=None, ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to @@ -1676,9 +1615,7 @@ def fillna( mask[mask.cumsum(self.ndim - 1) > limit] = False if inplace: - nbs = self.putmask( - mask.T, value, using_cow=using_cow, already_warned=already_warned - ) + nbs = self.putmask(mask.T, value, using_cow=using_cow) else: # without _downcast, we would break # test_fillna_dtype_conversion_equiv_replace @@ -1706,7 +1643,6 @@ def pad_or_backfill( limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, - already_warned=None, ) -> list[Block]: if not self._can_hold_na: # If there are no NAs, then interpolate is a no-op @@ -1727,19 +1663,6 @@ def pad_or_backfill( limit_area=limit_area, copy=copy, ) - if ( - not copy - and warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True if axis == 1: new_values = new_values.T @@ -1760,7 +1683,6 @@ def interpolate( limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, - already_warned=None, **kwargs, ) -> list[Block]: inplace = validate_bool_kwarg(inplace, "inplace") @@ -1799,20 +1721,6 @@ def interpolate( ) data = extract_array(new_values, extract_numpy=True) - if ( - not copy - and warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True - nb = self.make_block_same_class(data, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") @@ -2141,9 +2049,7 @@ def where( return [nb] @final - def putmask( - self, mask, new, using_cow: bool = False, already_warned=None - ) -> list[Block]: + def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -2161,19 +2067,6 @@ def putmask( return [self.copy(deep=False)] return [self] - if ( - warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True - self = self._maybe_copy(using_cow, inplace=True) values = self.values if values.ndim == 2: @@ -2257,7 +2150,6 @@ def pad_or_backfill( limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, using_cow: bool = False, - already_warned=None, ) -> list[Block]: values = self.values @@ -2300,7 +2192,6 @@ def fillna( inplace: bool = False, downcast=None, using_cow: bool = False, - already_warned=None, ) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Block.fillna handles coercion (test_fillna_interval) @@ -2310,7 +2201,6 @@ def fillna( inplace=inplace, downcast=downcast, using_cow=using_cow, - already_warned=already_warned, ) if using_cow and self._can_hold_na and not self.values._hasna: refs = self.refs @@ -2338,20 +2228,6 @@ def fillna( DeprecationWarning, stacklevel=find_stack_level(), ) - else: - if ( - not copy - and warn_copy_on_write() - and already_warned is not None - and not already_warned.warned_already - ): - if self.refs.has_reference(): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - already_warned.warned_already = True nb = self.make_block_same_class(new_values, refs=refs) return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e1a18cb79a1d6..fa54fde2ece84 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -18,10 +18,7 @@ import numpy as np -from pandas._config import ( - using_copy_on_write, - warn_copy_on_write, -) +from pandas._config import using_copy_on_write from pandas._libs import ( algos as libalgos, @@ -66,11 +63,7 @@ ) import pandas.core.algorithms as algos -from pandas.core.arrays import ( - ArrowExtensionArray, - ArrowStringArray, - DatetimeArray, -) +from pandas.core.arrays import DatetimeArray from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.base import PandasObject from pandas.core.construction import ( @@ -84,8 +77,6 @@ ensure_index, ) from pandas.core.internals.blocks import ( - COW_WARNING_GENERAL_MSG, - COW_WARNING_SETITEM_MSG, Block, NumpyBlock, ensure_block_shape, @@ -146,16 +137,6 @@ def ensure_np_dtype(dtype: DtypeObj) -> np.dtype: return dtype -class _AlreadyWarned: - def __init__(self) -> None: - # This class is used on the manager level to the block level to - # ensure that we warn only once. The block method can update the - # warned_already option without returning a value to keep the - # interface consistent. This is only a temporary solution for - # CoW warnings. - self.warned_already = False - - class BaseBlockManager(PandasObject): """ Core internal data structure to implement DataFrame, Series, etc. @@ -490,7 +471,6 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: inplace=inplace, downcast=downcast, using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), ) @final @@ -510,26 +490,19 @@ def where(self, other, cond, align: bool) -> Self: ) @final - def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self: + def putmask(self, mask, new, align: bool = True) -> Self: if align: align_keys = ["new", "mask"] else: align_keys = ["mask"] new = extract_array(new, extract_numpy=True) - already_warned = None - if warn_copy_on_write(): - already_warned = _AlreadyWarned() - if not warn: - already_warned.warned_already = True - return self.apply( "putmask", align_keys=align_keys, mask=mask, new=new, using_cow=using_copy_on_write(), - already_warned=already_warned, ) @final @@ -552,7 +525,6 @@ def replace(self, to_replace, value, inplace: bool) -> Self: value=value, inplace=inplace, using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), ) @final @@ -561,7 +533,6 @@ def replace_regex(self, **kwargs) -> Self: "_replace_regex", **kwargs, using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), ) @final @@ -582,7 +553,6 @@ def replace_list( inplace=inplace, regex=regex, using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), ) bm._consolidate_inplace() return bm @@ -593,7 +563,6 @@ def interpolate(self, inplace: bool, **kwargs) -> Self: inplace=inplace, **kwargs, using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), ) def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: @@ -602,7 +571,6 @@ def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: inplace=inplace, **kwargs, using_cow=using_copy_on_write(), - already_warned=_AlreadyWarned(), ) def shift(self, periods: int, fill_value) -> Self: @@ -611,7 +579,7 @@ def shift(self, periods: int, fill_value) -> Self: return self.apply("shift", periods=periods, fill_value=fill_value) - def setitem(self, indexer, value, warn: bool = True) -> Self: + def setitem(self, indexer, value) -> Self: """ Set values with indexer. @@ -620,14 +588,7 @@ def setitem(self, indexer, value, warn: bool = True) -> Self: if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: raise ValueError(f"Cannot set values with ndim > {self.ndim}") - if warn and warn_copy_on_write() and not self._has_no_reference(0): - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - - elif using_copy_on_write() and not self._has_no_reference(0): + if using_copy_on_write() and not self._has_no_reference(0): # this method is only called if there is a single block -> hardcoded 0 # Split blocks to only copy the columns we want to modify if self.ndim == 2 and isinstance(indexer, tuple): @@ -1576,17 +1537,7 @@ def column_setitem( This is a method on the BlockManager level, to avoid creating an intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) """ - needs_to_warn = False - if warn_copy_on_write() and not self._has_no_reference(loc): - if not isinstance( - self.blocks[self.blknos[loc]].values, - (ArrowExtensionArray, ArrowStringArray), - ): - # We might raise if we are in an expansion case, so defer - # warning till we actually updated - needs_to_warn = True - - elif using_copy_on_write() and not self._has_no_reference(loc): + if using_copy_on_write() and not self._has_no_reference(loc): blkno = self.blknos[loc] # Split blocks to only copy the column we want to modify blk_loc = self.blklocs[loc] @@ -1609,13 +1560,6 @@ def column_setitem( new_mgr = col_mgr.setitem((idx,), value) self.iset(loc, new_mgr._block.values, inplace=True) - if needs_to_warn: - warnings.warn( - COW_WARNING_GENERAL_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) - def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: """ Insert item at selected position. @@ -2298,7 +2242,7 @@ def get_numeric_data(self) -> Self: def _can_hold_na(self) -> bool: return self._block._can_hold_na - def setitem_inplace(self, indexer, value, warn: bool = True) -> None: + def setitem_inplace(self, indexer, value) -> None: """ Set values with indexer. @@ -2309,17 +2253,10 @@ def setitem_inplace(self, indexer, value, warn: bool = True) -> None: the dtype. """ using_cow = using_copy_on_write() - warn_cow = warn_copy_on_write() - if (using_cow or warn_cow) and not self._has_no_reference(0): + if using_cow and not self._has_no_reference(0): if using_cow: self.blocks = (self._block.copy(),) self._cache.clear() - elif warn_cow and warn: - warnings.warn( - COW_WARNING_SETITEM_MSG, - FutureWarning, - stacklevel=find_stack_level(), - ) arr = self.array diff --git a/pandas/core/series.py b/pandas/core/series.py index 657b384c57235..94be7bdbaca16 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -26,10 +26,7 @@ import numpy as np -from pandas._config import ( - using_copy_on_write, - warn_copy_on_write, -) +from pandas._config import using_copy_on_write from pandas._libs import ( lib, @@ -48,9 +45,6 @@ from pandas.errors.cow import ( _chained_assignment_method_msg, _chained_assignment_msg, - _chained_assignment_warning_method_msg, - _chained_assignment_warning_msg, - _check_cacher, ) from pandas.util._decorators import ( Appender, @@ -1075,7 +1069,7 @@ def __getitem__(self, key): key = com.apply_if_callable(key, self) if key is Ellipsis: - if using_copy_on_write() or warn_copy_on_write(): + if using_copy_on_write(): return self.copy(deep=False) return self @@ -1237,29 +1231,11 @@ def _get_value(self, label, takeable: bool = False): return self.iloc[loc] def __setitem__(self, key, value) -> None: - warn = True if not PYPY and using_copy_on_write(): if sys.getrefcount(self) <= 3: warnings.warn( _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 ) - elif not PYPY and not using_copy_on_write(): - ctr = sys.getrefcount(self) - ref_count = 3 - if not warn_copy_on_write() and _check_cacher(self): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count and ( - warn_copy_on_write() - or ( - not warn_copy_on_write() - and self._mgr.blocks[0].refs.has_reference() - ) - ): - warn = False - warnings.warn( - _chained_assignment_warning_msg, FutureWarning, stacklevel=2 - ) check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) @@ -1270,10 +1246,10 @@ def __setitem__(self, key, value) -> None: if isinstance(key, slice): indexer = self.index._convert_slice_indexer(key, kind="getitem") - return self._set_values(indexer, value, warn=warn) + return self._set_values(indexer, value) try: - self._set_with_engine(key, value, warn=warn) + self._set_with_engine(key, value) except KeyError: # We have a scalar (or for MultiIndex or object-dtype, scalar-like) # key that is not present in self.index. @@ -1332,25 +1308,25 @@ def __setitem__(self, key, value) -> None: # otherwise with listlike other we interpret series[mask] = other # as series[mask] = other[mask] try: - self._where(~key, value, inplace=True, warn=warn) + self._where(~key, value, inplace=True) except InvalidIndexError: # test_where_dups self.iloc[key] = value return else: - self._set_with(key, value, warn=warn) + self._set_with(key, value) if cacher_needs_updating: self._maybe_update_cacher(inplace=True) - def _set_with_engine(self, key, value, warn: bool = True) -> None: + def _set_with_engine(self, key, value) -> None: loc = self.index.get_loc(key) # this is equivalent to self._values[key] = value - self._mgr.setitem_inplace(loc, value, warn=warn) + self._mgr.setitem_inplace(loc, value) - def _set_with(self, key, value, warn: bool = True) -> None: + def _set_with(self, key, value) -> None: # We got here via exception-handling off of InvalidIndexError, so # key should always be listlike at this point. assert not isinstance(key, tuple) @@ -1361,7 +1337,7 @@ def _set_with(self, key, value, warn: bool = True) -> None: if not self.index._should_fallback_to_positional: # Regardless of the key type, we're treating it as labels - self._set_labels(key, value, warn=warn) + self._set_labels(key, value) else: # Note: key_type == "boolean" should not occur because that @@ -1378,23 +1354,23 @@ def _set_with(self, key, value, warn: bool = True) -> None: FutureWarning, stacklevel=find_stack_level(), ) - self._set_values(key, value, warn=warn) + self._set_values(key, value) else: - self._set_labels(key, value, warn=warn) + self._set_labels(key, value) - def _set_labels(self, key, value, warn: bool = True) -> None: + def _set_labels(self, key, value) -> None: key = com.asarray_tuplesafe(key) indexer: np.ndarray = self.index.get_indexer(key) mask = indexer == -1 if mask.any(): raise KeyError(f"{key[mask]} not in index") - self._set_values(indexer, value, warn=warn) + self._set_values(indexer, value) - def _set_values(self, key, value, warn: bool = True) -> None: + def _set_values(self, key, value) -> None: if isinstance(key, (Index, Series)): key = key._values - self._mgr = self._mgr.setitem(indexer=key, value=value, warn=warn) + self._mgr = self._mgr.setitem(indexer=key, value=value) self._maybe_update_cacher() def _set_value(self, label, value, takeable: bool = False) -> None: @@ -3594,18 +3570,6 @@ def update(self, other: Series | Sequence | Mapping) -> None: ChainedAssignmentError, stacklevel=2, ) - elif not PYPY and not using_copy_on_write() and self._is_view_after_cow_rules(): - ctr = sys.getrefcount(self) - ref_count = REF_COUNT - if _check_cacher(self): - # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221 - ref_count += 1 - if ctr <= ref_count: - warnings.warn( - _chained_assignment_warning_method_msg, - FutureWarning, - stacklevel=2, - ) if not isinstance(other, Series): other = Series(other) @@ -4755,11 +4719,7 @@ def transform( ) -> DataFrame | Series: # Validate axis argument self._get_axis_number(axis) - ser = ( - self.copy(deep=False) - if using_copy_on_write() or warn_copy_on_write() - else self - ) + ser = self.copy(deep=False) if using_copy_on_write() else self result = SeriesApply(ser, func=func, args=args, kwargs=kwargs).transform() return result diff --git a/pandas/errors/cow.py b/pandas/errors/cow.py index 2215ec2148757..9a3f6f4cc8efc 100644 --- a/pandas/errors/cow.py +++ b/pandas/errors/cow.py @@ -1,5 +1,3 @@ -from typing import Any - _chained_assignment_msg = ( "A value is trying to be set on a copy of a DataFrame or Series " "through chained assignment.\n" @@ -54,21 +52,3 @@ "df[col] = df[col].method(value) instead, to perform " "the operation inplace on the original object.\n\n" ) - - -def _check_cacher(obj: Any) -> bool: - # This is a mess, selection paths that return a view set the _cacher attribute - # on the Series; most of them also set _item_cache which adds 1 to our relevant - # reference count, but iloc does not, so we have to check if we are actually - # in the item cache - if hasattr(obj, "_cacher"): - parent = obj._cacher[1]() - # parent could be dead - if parent is None: - return False - if hasattr(parent, "_item_cache"): - if obj._cacher[0] in parent._item_cache: - # Check if we are actually the item from item_cache, iloc creates a - # new object - return obj is parent._item_cache[obj._cacher[0]] - return False From cc07895e36aa75693efc9bb58c91e546b32ff264 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 01:14:13 +0000 Subject: [PATCH 30/50] Remove Copy-on-Write warning mode from tests (#57237) --- pandas/conftest.py | 8 - pandas/tests/apply/test_frame_apply.py | 5 +- pandas/tests/computation/test_eval.py | 7 +- pandas/tests/copy_view/index/test_index.py | 30 ++- .../test_chained_assignment_deprecation.py | 109 +--------- pandas/tests/copy_view/test_clip.py | 8 +- pandas/tests/copy_view/test_constructors.py | 40 ++-- .../copy_view/test_core_functionalities.py | 12 +- pandas/tests/copy_view/test_indexing.py | 187 ++++++------------ pandas/tests/copy_view/test_interp_fillna.py | 85 +++----- pandas/tests/copy_view/test_methods.py | 179 ++++++----------- pandas/tests/copy_view/test_replace.py | 55 ++---- pandas/tests/copy_view/test_setitem.py | 5 +- pandas/tests/frame/indexing/test_getitem.py | 5 +- pandas/tests/frame/indexing/test_indexing.py | 23 +-- pandas/tests/frame/indexing/test_setitem.py | 9 +- pandas/tests/frame/indexing/test_xs.py | 19 +- pandas/tests/frame/methods/test_cov_corr.py | 5 +- pandas/tests/frame/methods/test_fillna.py | 15 +- pandas/tests/frame/methods/test_pop.py | 5 +- pandas/tests/frame/methods/test_rename.py | 5 +- .../frame/methods/test_to_dict_of_blocks.py | 7 +- pandas/tests/frame/methods/test_update.py | 15 +- pandas/tests/frame/test_api.py | 7 +- pandas/tests/frame/test_arithmetic.py | 5 +- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/frame/test_constructors.py | 13 +- pandas/tests/generic/test_duplicate_labels.py | 6 +- pandas/tests/groupby/test_apply_mutate.py | 6 +- pandas/tests/groupby/test_reductions.py | 2 +- .../indexes/period/test_partial_slicing.py | 7 +- .../multiindex/test_chaining_and_caching.py | 7 +- .../tests/indexing/multiindex/test_partial.py | 1 - .../tests/indexing/multiindex/test_setitem.py | 33 ++-- .../indexing/test_chaining_and_caching.py | 79 ++------ pandas/tests/indexing/test_iat.py | 11 +- pandas/tests/indexing/test_iloc.py | 26 +-- pandas/tests/indexing/test_loc.py | 22 +-- .../series/accessors/test_dt_accessor.py | 7 +- pandas/tests/series/indexing/test_indexing.py | 10 +- pandas/tests/series/methods/test_copy.py | 5 +- .../series/methods/test_get_numeric_data.py | 7 +- pandas/tests/series/methods/test_rename.py | 5 +- pandas/tests/series/test_constructors.py | 6 +- 44 files changed, 317 insertions(+), 788 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index db251a07aeb5d..54d7122cd73de 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1966,14 +1966,6 @@ def using_copy_on_write() -> bool: return True -@pytest.fixture -def warn_copy_on_write() -> bool: - """ - Fixture to check if Copy-on-Write is in warning mode. - """ - return False - - @pytest.fixture def using_infer_string() -> bool: """ diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 66a43f2ba4bcd..c35f9bf13200f 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1487,7 +1487,7 @@ def test_apply_dtype(col): tm.assert_series_equal(result, expected) -def test_apply_mutating(using_copy_on_write, warn_copy_on_write): +def test_apply_mutating(using_copy_on_write): # GH#35462 case where applied func pins a new BlockManager to a row df = DataFrame({"a": range(100), "b": range(100, 200)}) df_orig = df.copy() @@ -1501,8 +1501,7 @@ def func(row): expected = df.copy() expected["a"] += 1 - with tm.assert_cow_warning(warn_copy_on_write): - result = df.apply(func, axis=1) + result = df.apply(func, axis=1) tm.assert_frame_equal(result, expected) if using_copy_on_write: diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py index b69fb573987f9..7b1f8b22301a1 100644 --- a/pandas/tests/computation/test_eval.py +++ b/pandas/tests/computation/test_eval.py @@ -1290,7 +1290,7 @@ def test_assignment_not_inplace(self): expected["c"] = expected["a"] + expected["b"] tm.assert_frame_equal(df, expected) - def test_multi_line_expression(self, warn_copy_on_write): + def test_multi_line_expression(self): # GH 11149 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) expected = df.copy() @@ -1964,15 +1964,14 @@ def test_eval_no_support_column_name(request, column): tm.assert_frame_equal(result, expected) -def test_set_inplace(using_copy_on_write, warn_copy_on_write): +def test_set_inplace(using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/47449 # Ensure we don't only update the DataFrame inplace, but also the actual # column values, such that references to this column also get updated df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) result_view = df[:] ser = df["A"] - with tm.assert_cow_warning(warn_copy_on_write): - df.eval("A = B + C", inplace=True) + df.eval("A = B + C", inplace=True) expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]}) tm.assert_frame_equal(df, expected) if not using_copy_on_write: diff --git a/pandas/tests/copy_view/index/test_index.py b/pandas/tests/copy_view/index/test_index.py index 596379a3266fb..9a788c5fd4193 100644 --- a/pandas/tests/copy_view/index/test_index.py +++ b/pandas/tests/copy_view/index/test_index.py @@ -19,12 +19,11 @@ def index_view(index_data): return idx, view -def test_set_index_update_column(using_copy_on_write, warn_copy_on_write): +def test_set_index_update_column(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1}) df = df.set_index("a", drop=False) expected = df.index.copy(deep=True) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 100 + df.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: @@ -40,53 +39,49 @@ def test_set_index_drop_update_column(using_copy_on_write): tm.assert_index_equal(df.index, expected) -def test_set_index_series(using_copy_on_write, warn_copy_on_write): +def test_set_index_series(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df = df.set_index(ser) expected = df.index.copy(deep=True) - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 100 + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: tm.assert_index_equal(df.index, Index([100, 11])) -def test_assign_index_as_series(using_copy_on_write, warn_copy_on_write): +def test_assign_index_as_series(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) df.index = ser expected = df.index.copy(deep=True) - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 100 + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: tm.assert_index_equal(df.index, Index([100, 11])) -def test_assign_index_as_index(using_copy_on_write, warn_copy_on_write): +def test_assign_index_as_index(using_copy_on_write): df = DataFrame({"a": [1, 2], "b": 1.5}) ser = Series([10, 11]) rhs_index = Index(ser) df.index = rhs_index rhs_index = None # overwrite to clear reference expected = df.index.copy(deep=True) - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 100 + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(df.index, expected) else: tm.assert_index_equal(df.index, Index([100, 11])) -def test_index_from_series(using_copy_on_write, warn_copy_on_write): +def test_index_from_series(using_copy_on_write): ser = Series([1, 2]) idx = Index(ser) expected = idx.copy(deep=True) - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 100 + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(idx, expected) else: @@ -101,13 +96,12 @@ def test_index_from_series_copy(using_copy_on_write): assert np.shares_memory(get_array(ser), arr) -def test_index_from_index(using_copy_on_write, warn_copy_on_write): +def test_index_from_index(using_copy_on_write): ser = Series([1, 2]) idx = Index(ser) idx = Index(idx) expected = idx.copy(deep=True) - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 100 + ser.iloc[0] = 100 if using_copy_on_write: tm.assert_index_equal(idx, expected) else: diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 0a37f6b813e55..cfa9cf64357b6 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -1,7 +1,6 @@ import numpy as np import pytest -from pandas.compat import PY311 from pandas.errors import ( ChainedAssignmentError, SettingWithCopyWarning, @@ -33,117 +32,11 @@ def test_methods_iloc_warn(using_copy_on_write): df.iloc[:, 0].bfill(inplace=True) -@pytest.mark.parametrize( - "func, args", - [ - ("replace", (4, 5)), - ("fillna", (1,)), - ("interpolate", ()), - ("bfill", ()), - ("ffill", ()), - ], -) -def test_methods_iloc_getitem_item_cache( - func, args, using_copy_on_write, warn_copy_on_write -): - # ensure we don't incorrectly raise chained assignment warning because - # of the item cache / iloc not setting the item cache - df_orig = DataFrame({"a": [1, 2, 3], "b": 1}) - - df = df_orig.copy() - ser = df.iloc[:, 0] - getattr(ser, func)(*args, inplace=True) - - # parent that holds item_cache is dead, so don't increase ref count - df = df_orig.copy() - ser = df.copy()["a"] - getattr(ser, func)(*args, inplace=True) - - df = df_orig.copy() - df["a"] # populate the item_cache - ser = df.iloc[:, 0] # iloc creates a new object - getattr(ser, func)(*args, inplace=True) - - df = df_orig.copy() - df["a"] # populate the item_cache - ser = df["a"] - getattr(ser, func)(*args, inplace=True) - - df = df_orig.copy() - df["a"] # populate the item_cache - # TODO(CoW-warn) because of the usage of *args, this doesn't warn on Py3.11+ - if using_copy_on_write: - with tm.raises_chained_assignment_error(not PY311): - getattr(df["a"], func)(*args, inplace=True) - else: - with tm.assert_cow_warning(not PY311, match="A value"): - getattr(df["a"], func)(*args, inplace=True) - - df = df_orig.copy() - ser = df["a"] # populate the item_cache and keep ref - if using_copy_on_write: - with tm.raises_chained_assignment_error(not PY311): - getattr(df["a"], func)(*args, inplace=True) - else: - # ideally also warns on the default mode, but the ser' _cacher - # messes up the refcount + even in warning mode this doesn't trigger - # the warning of Py3.1+ (see above) - with tm.assert_cow_warning(warn_copy_on_write and not PY311, match="A value"): - getattr(df["a"], func)(*args, inplace=True) - - -def test_methods_iloc_getitem_item_cache_fillna( - using_copy_on_write, warn_copy_on_write -): - # ensure we don't incorrectly raise chained assignment warning because - # of the item cache / iloc not setting the item cache - df_orig = DataFrame({"a": [1, 2, 3], "b": 1}) - - df = df_orig.copy() - ser = df.iloc[:, 0] - ser.fillna(1, inplace=True) - - # parent that holds item_cache is dead, so don't increase ref count - df = df_orig.copy() - ser = df.copy()["a"] - ser.fillna(1, inplace=True) - - df = df_orig.copy() - df["a"] # populate the item_cache - ser = df.iloc[:, 0] # iloc creates a new object - ser.fillna(1, inplace=True) - - df = df_orig.copy() - df["a"] # populate the item_cache - ser = df["a"] - ser.fillna(1, inplace=True) - - df = df_orig.copy() - df["a"] # populate the item_cache - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].fillna(1, inplace=True) - else: - with tm.assert_cow_warning(match="A value"): - df["a"].fillna(1, inplace=True) - - df = df_orig.copy() - ser = df["a"] # populate the item_cache and keep ref - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].fillna(1, inplace=True) - else: - # TODO(CoW-warn) ideally also warns on the default mode, but the ser' _cacher - # messes up the refcount - with tm.assert_cow_warning(warn_copy_on_write, match="A value"): - df["a"].fillna(1, inplace=True) - - # TODO(CoW-warn) expand the cases @pytest.mark.parametrize( "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] ) -def test_series_setitem(indexer, using_copy_on_write, warn_copy_on_write): +def test_series_setitem(indexer, using_copy_on_write): # ensure we only get a single warning for those typical cases of chained # assignment df = DataFrame({"a": [1, 2, 3], "b": 1}) diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 7c87646424e2f..9be9ba6f144c4 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -8,16 +8,12 @@ from pandas.tests.copy_view.util import get_array -def test_clip_inplace_reference(using_copy_on_write, warn_copy_on_write): +def test_clip_inplace_reference(using_copy_on_write): df = DataFrame({"a": [1.5, 2, 3]}) df_copy = df.copy() arr_a = get_array(df, "a") view = df[:] - if warn_copy_on_write: - with tm.assert_cow_warning(): - df.clip(lower=2, inplace=True) - else: - df.clip(lower=2, inplace=True) + df.clip(lower=2, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) diff --git a/pandas/tests/copy_view/test_constructors.py b/pandas/tests/copy_view/test_constructors.py index cbd0e6899bfc9..5f095d3d74c54 100644 --- a/pandas/tests/copy_view/test_constructors.py +++ b/pandas/tests/copy_view/test_constructors.py @@ -21,7 +21,7 @@ @pytest.mark.parametrize("dtype", [None, "int64"]) -def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write): +def test_series_from_series(dtype, using_copy_on_write): # Case: constructing a Series from another Series object follows CoW rules: # a new object is returned and thus mutations are not propagated ser = Series([1, 2, 3], name="name") @@ -43,8 +43,7 @@ def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write): assert not np.shares_memory(get_array(ser), get_array(result)) else: # mutating shallow copy does mutate original - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[0] = 0 + result.iloc[0] = 0 assert ser.iloc[0] == 0 # and still shares memory assert np.shares_memory(get_array(ser), get_array(result)) @@ -58,12 +57,11 @@ def test_series_from_series(dtype, using_copy_on_write, warn_copy_on_write): assert result.iloc[0] == 1 else: # mutating original does mutate shallow copy - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 0 + ser.iloc[0] = 0 assert result.iloc[0] == 0 -def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write): +def test_series_from_series_with_reindex(using_copy_on_write): # Case: constructing a Series from another Series with specifying an index # that potentially requires a reindex of the values ser = Series([1, 2, 3], name="name") @@ -78,8 +76,7 @@ def test_series_from_series_with_reindex(using_copy_on_write, warn_copy_on_write ]: result = Series(ser, index=index) assert np.shares_memory(ser.values, result.values) - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[0] = 0 + result.iloc[0] = 0 if using_copy_on_write: assert ser.iloc[0] == 1 else: @@ -190,9 +187,7 @@ def test_series_from_block_manager_different_dtype(using_copy_on_write): @pytest.mark.parametrize("use_mgr", [True, False]) @pytest.mark.parametrize("columns", [None, ["a"]]) -def test_dataframe_constructor_mgr_or_df( - using_copy_on_write, warn_copy_on_write, columns, use_mgr -): +def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, use_mgr): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() @@ -207,8 +202,7 @@ def test_dataframe_constructor_mgr_or_df( new_df = DataFrame(data) assert np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) - with tm.assert_cow_warning(warn_copy_on_write and not use_mgr): - new_df.iloc[0] = 100 + new_df.iloc[0] = 100 if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), get_array(new_df, "a")) @@ -221,9 +215,7 @@ def test_dataframe_constructor_mgr_or_df( @pytest.mark.parametrize("dtype", [None, "int64", "Int64"]) @pytest.mark.parametrize("index", [None, [0, 1, 2]]) @pytest.mark.parametrize("columns", [None, ["a", "b"], ["a", "b", "c"]]) -def test_dataframe_from_dict_of_series( - request, using_copy_on_write, warn_copy_on_write, columns, index, dtype -): +def test_dataframe_from_dict_of_series(using_copy_on_write, columns, index, dtype): # Case: constructing a DataFrame from Series objects with copy=False # has to do a lazy following CoW rules # (the default for DataFrame(dict) is still to copy to ensure consolidation) @@ -242,8 +234,7 @@ def test_dataframe_from_dict_of_series( assert np.shares_memory(get_array(result, "a"), get_array(s1)) # mutating the new dataframe doesn't mutate original - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[0, 0] = 10 + result.iloc[0, 0] = 10 if using_copy_on_write: assert not np.shares_memory(get_array(result, "a"), get_array(s1)) tm.assert_series_equal(s1, s1_orig) @@ -256,8 +247,7 @@ def test_dataframe_from_dict_of_series( result = DataFrame( {"a": s1, "b": s2}, index=index, columns=columns, dtype=dtype, copy=False ) - with tm.assert_cow_warning(warn_copy_on_write): - s1.iloc[0] = 10 + s1.iloc[0] = 10 if using_copy_on_write: assert not np.shares_memory(get_array(result, "a"), get_array(s1)) tm.assert_frame_equal(result, expected) @@ -287,7 +277,7 @@ def test_dataframe_from_dict_of_series_with_reindex(dtype): "data, dtype", [([1, 2], None), ([1, 2], "int64"), (["a", "b"], None)] ) def test_dataframe_from_series_or_index( - using_copy_on_write, warn_copy_on_write, data, dtype, index_or_series + using_copy_on_write, data, dtype, index_or_series ): obj = index_or_series(data, dtype=dtype) obj_orig = obj.copy() @@ -296,8 +286,7 @@ def test_dataframe_from_series_or_index( if using_copy_on_write: assert not df._mgr._has_no_reference(0) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = data[-1] + df.iloc[0, 0] = data[-1] if using_copy_on_write: tm.assert_equal(obj, obj_orig) @@ -349,7 +338,7 @@ def test_frame_from_numpy_array(using_copy_on_write, copy): assert np.shares_memory(get_array(df, 0), arr) -def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on_write): +def test_dataframe_from_records_with_dataframe(using_copy_on_write): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() with tm.assert_produces_warning(FutureWarning): @@ -357,8 +346,7 @@ def test_dataframe_from_records_with_dataframe(using_copy_on_write, warn_copy_on if using_copy_on_write: assert not df._mgr._has_no_reference(0) assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) - with tm.assert_cow_warning(warn_copy_on_write): - df2.iloc[0, 0] = 100 + df2.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: diff --git a/pandas/tests/copy_view/test_core_functionalities.py b/pandas/tests/copy_view/test_core_functionalities.py index 8dc80c5cc0e0e..b37e1a3718ac1 100644 --- a/pandas/tests/copy_view/test_core_functionalities.py +++ b/pandas/tests/copy_view/test_core_functionalities.py @@ -28,23 +28,20 @@ def test_setitem_dont_track_unnecessary_references(using_copy_on_write): assert np.shares_memory(arr, get_array(df, "a")) -def test_setitem_with_view_copies(using_copy_on_write, warn_copy_on_write): +def test_setitem_with_view_copies(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] expected = df.copy() df["b"] = 100 arr = get_array(df, "a") - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 100 # Check that we correctly track reference + df.iloc[0, 0] = 100 # Check that we correctly track reference if using_copy_on_write: assert not np.shares_memory(arr, get_array(df, "a")) tm.assert_frame_equal(view, expected) -def test_setitem_with_view_invalidated_does_not_copy( - using_copy_on_write, warn_copy_on_write, request -): +def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request): df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1}) view = df[:] @@ -53,8 +50,7 @@ def test_setitem_with_view_invalidated_does_not_copy( view = None # noqa: F841 # TODO(CoW-warn) false positive? -> block gets split because of `df["b"] = 100` # which introduces additional refs, even when those of `view` go out of scopes - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 100 + df.iloc[0, 0] = 100 if using_copy_on_write: # Setitem split the block. Since the old block shared data with view # all the new blocks are referencing view and each other. When view diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index c35a0b89585c3..69fb8fe2c6f63 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -101,7 +101,7 @@ def test_subset_column_selection_modify_parent(backend, using_copy_on_write): tm.assert_frame_equal(subset, expected) -def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): +def test_subset_row_slice(backend, using_copy_on_write): # Case: taking a subset of the rows of a DataFrame using a slice # + afterwards modifying the subset _, DataFrame, _ = backend @@ -121,8 +121,7 @@ def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): # INFO this no longer raise warning since pandas 1.4 # with pd.option_context("chained_assignment", "warn"): # with tm.assert_produces_warning(SettingWithCopyWarning): - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 subset._mgr._verify_integrity() @@ -140,7 +139,7 @@ def test_subset_row_slice(backend, using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_column_slice(backend, using_copy_on_write, warn_copy_on_write, dtype): +def test_subset_column_slice(backend, using_copy_on_write, dtype): # Case: taking a subset of the columns of a DataFrame using a slice # + afterwards modifying the subset dtype_backend, DataFrame, _ = backend @@ -158,9 +157,6 @@ def test_subset_column_slice(backend, using_copy_on_write, warn_copy_on_write, d subset.iloc[0, 0] = 0 assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) - elif warn_copy_on_write: - with tm.assert_cow_warning(single_block): - subset.iloc[0, 0] = 0 else: # we only get a warning in case of a single block warn = SettingWithCopyWarning if single_block else None @@ -198,7 +194,6 @@ def test_subset_loc_rows_columns( row_indexer, column_indexer, using_copy_on_write, - warn_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .loc # + afterwards modifying the subset @@ -223,8 +218,7 @@ def test_subset_loc_rows_columns( ) # modifying the subset never modifies the parent - with tm.assert_cow_warning(warn_copy_on_write and mutate_parent): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 expected = DataFrame( {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) @@ -254,7 +248,6 @@ def test_subset_iloc_rows_columns( row_indexer, column_indexer, using_copy_on_write, - warn_copy_on_write, ): # Case: taking a subset of the rows+columns of a DataFrame using .iloc # + afterwards modifying the subset @@ -279,8 +272,7 @@ def test_subset_iloc_rows_columns( ) # modifying the subset never modifies the parent - with tm.assert_cow_warning(warn_copy_on_write and mutate_parent): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 expected = DataFrame( {"b": [0, 6], "c": np.array([8, 9], dtype=dtype)}, index=range(1, 3) @@ -296,9 +288,7 @@ def test_subset_iloc_rows_columns( [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], ids=["slice", "mask", "array"], ) -def test_subset_set_with_row_indexer( - backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write -): +def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on_write): # Case: setting values with a row indexer on a viewing subset # subset[indexer] = value and subset.iloc[indexer] = value _, DataFrame, _ = backend @@ -315,9 +305,6 @@ def test_subset_set_with_row_indexer( if using_copy_on_write: indexer_si(subset)[indexer] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - indexer_si(subset)[indexer] = 0 else: # INFO iloc no longer raises warning since pandas 1.4 warn = SettingWithCopyWarning if indexer_si is tm.setitem else None @@ -338,7 +325,7 @@ def test_subset_set_with_row_indexer( tm.assert_frame_equal(df, df_orig) -def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): +def test_subset_set_with_mask(backend, using_copy_on_write): # Case: setting values with a mask on a viewing subset: subset[mask] = value _, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) @@ -349,9 +336,6 @@ def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): if using_copy_on_write: subset[mask] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - subset[mask] = 0 else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): @@ -371,7 +355,7 @@ def test_subset_set_with_mask(backend, using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): +def test_subset_set_column(backend, using_copy_on_write): # Case: setting a single column on a viewing subset -> subset[col] = value dtype_backend, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -383,7 +367,7 @@ def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): else: arr = pd.array([10, 11], dtype="Int64") - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: subset["a"] = arr else: with pd.option_context("chained_assignment", "warn"): @@ -401,9 +385,7 @@ def test_subset_set_column(backend, using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_set_column_with_loc( - backend, using_copy_on_write, warn_copy_on_write, dtype -): +def test_subset_set_column_with_loc(backend, using_copy_on_write, dtype): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value _, DataFrame, _ = backend @@ -415,9 +397,6 @@ def test_subset_set_column_with_loc( if using_copy_on_write: subset.loc[:, "a"] = np.array([10, 11], dtype="int64") - elif warn_copy_on_write: - with tm.assert_cow_warning(): - subset.loc[:, "a"] = np.array([10, 11], dtype="int64") else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(None): @@ -438,7 +417,7 @@ def test_subset_set_column_with_loc( tm.assert_frame_equal(df, df_orig) -def test_subset_set_column_with_loc2(backend, using_copy_on_write, warn_copy_on_write): +def test_subset_set_column_with_loc2(backend, using_copy_on_write): # Case: setting a single column with loc on a viewing subset # -> subset.loc[:, col] = value # separate test for case of DataFrame of a single column -> takes a separate @@ -450,9 +429,6 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, warn_copy_on_ if using_copy_on_write: subset.loc[:, "a"] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - subset.loc[:, "a"] = 0 else: with pd.option_context("chained_assignment", "warn"): with tm.assert_produces_warning(None): @@ -473,7 +449,7 @@ def test_subset_set_column_with_loc2(backend, using_copy_on_write, warn_copy_on_ @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dtype): +def test_subset_set_columns(backend, using_copy_on_write, dtype): # Case: setting multiple columns on a viewing subset # -> subset[[col1, col2]] = value dtype_backend, DataFrame, _ = backend @@ -483,7 +459,7 @@ def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dt df_orig = df.copy() subset = df[1:3] - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: subset[["a", "c"]] = 0 else: with pd.option_context("chained_assignment", "warn"): @@ -510,9 +486,7 @@ def test_subset_set_columns(backend, using_copy_on_write, warn_copy_on_write, dt [slice("a", "b"), np.array([True, True, False]), ["a", "b"]], ids=["slice", "mask", "array"], ) -def test_subset_set_with_column_indexer( - backend, indexer, using_copy_on_write, warn_copy_on_write -): +def test_subset_set_with_column_indexer(backend, indexer, using_copy_on_write): # Case: setting multiple columns with a column indexer on a viewing subset # -> subset.loc[:, [col1, col2]] = value _, DataFrame, _ = backend @@ -522,9 +496,6 @@ def test_subset_set_with_column_indexer( if using_copy_on_write: subset.loc[:, indexer] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - subset.loc[:, indexer] = 0 else: with pd.option_context("chained_assignment", "warn"): # As of 2.0, this setitem attempts (successfully) to set values @@ -572,7 +543,6 @@ def test_subset_chained_getitem( method, dtype, using_copy_on_write, - warn_copy_on_write, ): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour @@ -593,8 +563,7 @@ def test_subset_chained_getitem( # modify subset -> don't modify parent subset = method(df) - with tm.assert_cow_warning(warn_copy_on_write and subset_is_view): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 if using_copy_on_write or (not subset_is_view): tm.assert_frame_equal(df, df_orig) else: @@ -602,8 +571,7 @@ def test_subset_chained_getitem( # modify parent -> don't modify subset subset = method(df) - with tm.assert_cow_warning(warn_copy_on_write and subset_is_view): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 expected = DataFrame({"a": [1, 2], "b": [4, 5]}) if using_copy_on_write or not subset_is_view: tm.assert_frame_equal(subset, expected) @@ -614,9 +582,7 @@ def test_subset_chained_getitem( @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_chained_getitem_column( - backend, dtype, using_copy_on_write, warn_copy_on_write -): +def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour dtype_backend, DataFrame, Series = backend @@ -628,8 +594,7 @@ def test_subset_chained_getitem_column( # modify subset -> don't modify parent subset = df[:]["a"][0:2] df._clear_item_cache() - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0] = 0 + subset.iloc[0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: @@ -638,8 +603,7 @@ def test_subset_chained_getitem_column( # modify parent -> don't modify subset subset = df[:]["a"][0:2] df._clear_item_cache() - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") if using_copy_on_write: tm.assert_series_equal(subset, expected) @@ -661,9 +625,7 @@ def test_subset_chained_getitem_column( ], ids=["getitem", "iloc", "loc", "long-chain"], ) -def test_subset_chained_getitem_series( - backend, method, using_copy_on_write, warn_copy_on_write -): +def test_subset_chained_getitem_series(backend, method, using_copy_on_write): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour _, _, Series = backend @@ -672,8 +634,7 @@ def test_subset_chained_getitem_series( # modify subset -> don't modify parent subset = method(s) - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0] = 0 + subset.iloc[0] = 0 if using_copy_on_write: tm.assert_series_equal(s, s_orig) else: @@ -681,8 +642,7 @@ def test_subset_chained_getitem_series( # modify parent -> don't modify subset subset = s.iloc[0:3].iloc[0:2] - with tm.assert_cow_warning(warn_copy_on_write): - s.iloc[0] = 0 + s.iloc[0] = 0 expected = Series([1, 2], index=["a", "b"]) if using_copy_on_write: tm.assert_series_equal(subset, expected) @@ -690,15 +650,14 @@ def test_subset_chained_getitem_series( assert subset.iloc[0] == 0 -def test_subset_chained_single_block_row(using_copy_on_write, warn_copy_on_write): +def test_subset_chained_single_block_row(using_copy_on_write): # not parametrizing this for dtype backend, since this explicitly tests single block df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() # modify subset -> don't modify parent subset = df[:].iloc[0].iloc[0:2] - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0] = 0 + subset.iloc[0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: @@ -706,8 +665,7 @@ def test_subset_chained_single_block_row(using_copy_on_write, warn_copy_on_write # modify parent -> don't modify subset subset = df[:].iloc[0].iloc[0:2] - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 expected = Series([1, 4], index=["a", "b"], name=0) if using_copy_on_write: tm.assert_series_equal(subset, expected) @@ -726,7 +684,7 @@ def test_subset_chained_single_block_row(using_copy_on_write, warn_copy_on_write ], ids=["getitem", "loc", "loc-rows", "iloc", "iloc-rows"], ) -def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write): +def test_null_slice(backend, method, using_copy_on_write): # Case: also all variants of indexing with a null slice (:) should return # new objects to ensure we correctly use CoW for the results dtype_backend, DataFrame, _ = backend @@ -739,8 +697,7 @@ def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write): assert df2 is not df # and those trigger CoW when mutated - with tm.assert_cow_warning(warn_copy_on_write): - df2.iloc[0, 0] = 0 + df2.iloc[0, 0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: @@ -756,7 +713,7 @@ def test_null_slice(backend, method, using_copy_on_write, warn_copy_on_write): ], ids=["getitem", "loc", "iloc"], ) -def test_null_slice_series(backend, method, using_copy_on_write, warn_copy_on_write): +def test_null_slice_series(backend, method, using_copy_on_write): _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) s_orig = s.copy() @@ -767,8 +724,7 @@ def test_null_slice_series(backend, method, using_copy_on_write, warn_copy_on_wr assert s2 is not s # and those trigger CoW when mutated - with tm.assert_cow_warning(warn_copy_on_write): - s2.iloc[0] = 0 + s2.iloc[0] = 0 if using_copy_on_write: tm.assert_series_equal(s, s_orig) else: @@ -782,7 +738,7 @@ def test_null_slice_series(backend, method, using_copy_on_write, warn_copy_on_wr # Series -- Indexing operations taking subset + modifying the subset/parent -def test_series_getitem_slice(backend, using_copy_on_write, warn_copy_on_write): +def test_series_getitem_slice(backend, using_copy_on_write): # Case: taking a slice of a Series + afterwards modifying the subset _, _, Series = backend s = Series([1, 2, 3], index=["a", "b", "c"]) @@ -791,8 +747,7 @@ def test_series_getitem_slice(backend, using_copy_on_write, warn_copy_on_write): subset = s[:] assert np.shares_memory(get_array(subset), get_array(s)) - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0] = 0 + subset.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(subset), get_array(s)) @@ -808,7 +763,7 @@ def test_series_getitem_slice(backend, using_copy_on_write, warn_copy_on_write): assert s.iloc[0] == 0 -def test_series_getitem_ellipsis(using_copy_on_write, warn_copy_on_write): +def test_series_getitem_ellipsis(using_copy_on_write): # Case: taking a view of a Series using Ellipsis + afterwards modifying the subset s = Series([1, 2, 3]) s_orig = s.copy() @@ -816,8 +771,7 @@ def test_series_getitem_ellipsis(using_copy_on_write, warn_copy_on_write): subset = s[...] assert np.shares_memory(get_array(subset), get_array(s)) - with tm.assert_cow_warning(warn_copy_on_write): - subset.iloc[0] = 0 + subset.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(subset), get_array(s)) @@ -839,7 +793,7 @@ def test_series_getitem_ellipsis(using_copy_on_write, warn_copy_on_write): ids=["slice", "mask", "array"], ) def test_series_subset_set_with_indexer( - backend, indexer_si, indexer, using_copy_on_write, warn_copy_on_write + backend, indexer_si, indexer, using_copy_on_write ): # Case: setting values in a viewing Series with an indexer _, _, Series = backend @@ -855,12 +809,8 @@ def test_series_subset_set_with_indexer( and indexer.dtype.kind == "i" ): warn = FutureWarning - if warn_copy_on_write: - with tm.assert_cow_warning(raise_on_extra_warnings=warn is not None): - indexer_si(subset)[indexer] = 0 - else: - with tm.assert_produces_warning(warn, match=msg): - indexer_si(subset)[indexer] = 0 + with tm.assert_produces_warning(warn, match=msg): + indexer_si(subset)[indexer] = 0 expected = Series([0, 0, 3], index=["a", "b", "c"]) tm.assert_series_equal(subset, expected) @@ -874,7 +824,7 @@ def test_series_subset_set_with_indexer( # del operator -def test_del_frame(backend, using_copy_on_write, warn_copy_on_write): +def test_del_frame(backend, using_copy_on_write): # Case: deleting a column with `del` on a viewing child dataframe should # not modify parent + update the references dtype_backend, DataFrame, _ = backend @@ -891,13 +841,11 @@ def test_del_frame(backend, using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(df2, df_orig[["a", "c"]]) df2._mgr._verify_integrity() - with tm.assert_cow_warning(warn_copy_on_write and dtype_backend == "numpy"): - df.loc[0, "b"] = 200 + df.loc[0, "b"] = 200 assert np.shares_memory(get_array(df, "a"), get_array(df2, "a")) df_orig = df.copy() - with tm.assert_cow_warning(warn_copy_on_write): - df2.loc[0, "a"] = 100 + df2.loc[0, "a"] = 100 if using_copy_on_write: # modifying child after deleting a column still doesn't update parent tm.assert_frame_equal(df, df_orig) @@ -929,7 +877,7 @@ def test_del_series(backend): # Accessing column as Series -def test_column_as_series(backend, using_copy_on_write, warn_copy_on_write): +def test_column_as_series(backend, using_copy_on_write): # Case: selecting a single column now also uses Copy-on-Write dtype_backend, DataFrame, Series = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -941,9 +889,6 @@ def test_column_as_series(backend, using_copy_on_write, warn_copy_on_write): if using_copy_on_write: s[0] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - s[0] = 0 else: warn = SettingWithCopyWarning if dtype_backend == "numpy" else None with pd.option_context("chained_assignment", "warn"): @@ -962,9 +907,7 @@ def test_column_as_series(backend, using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_column_as_series_set_with_upcast( - backend, using_copy_on_write, warn_copy_on_write -): +def test_column_as_series_set_with_upcast(backend, using_copy_on_write): # Case: selecting a single column now also uses Copy-on-Write -> when # setting a value causes an upcast, we don't need to update the parent # DataFrame through the cache mechanism @@ -974,12 +917,10 @@ def test_column_as_series_set_with_upcast( s = df["a"] if dtype_backend == "nullable": - with tm.assert_cow_warning(warn_copy_on_write): - with pytest.raises(TypeError, match="Invalid value"): - s[0] = "foo" + with pytest.raises(TypeError, match="Invalid value"): + s[0] = "foo" expected = Series([1, 2, 3], name="a") - elif using_copy_on_write or warn_copy_on_write: - # TODO(CoW-warn) assert the FutureWarning for CoW is also raised + elif using_copy_on_write: with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") @@ -1021,7 +962,6 @@ def test_column_as_series_no_item_cache( backend, method, using_copy_on_write, - warn_copy_on_write, ): # Case: selecting a single column (which now also uses Copy-on-Write to protect # the view) should always give a new object (i.e. not make use of a cache) @@ -1033,16 +973,13 @@ def test_column_as_series_no_item_cache( s2 = method(df) is_iloc = "iloc" in request.node.name - if using_copy_on_write or warn_copy_on_write or is_iloc: + if using_copy_on_write or is_iloc: assert s1 is not s2 else: assert s1 is s2 if using_copy_on_write: s1.iloc[0] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(): - s1.iloc[0] = 0 else: warn = SettingWithCopyWarning if dtype_backend == "numpy" else None with pd.option_context("chained_assignment", "warn"): @@ -1094,7 +1031,7 @@ def test_dataframe_add_column_from_series(backend, using_copy_on_write): "col", [[0.1, 0.2, 0.3], [7, 8, 9]], ids=["mixed-block", "single-block"] ) def test_set_value_copy_only_necessary_column( - using_copy_on_write, warn_copy_on_write, indexer_func, indexer, val, col + using_copy_on_write, indexer_func, indexer, val, col ): # When setting inplace, only copy column that is modified instead of the whole # block (by splitting the block) @@ -1102,19 +1039,13 @@ def test_set_value_copy_only_necessary_column( df_orig = df.copy() view = df[:] - if val == "a" and not warn_copy_on_write: + if val == "a": with tm.assert_produces_warning( FutureWarning, match="Setting an item of incompatible dtype is deprecated" ): indexer_func(df)[indexer] = val - if val == "a" and warn_copy_on_write: - with tm.assert_produces_warning( - FutureWarning, match="incompatible dtype|Setting a value on a view" - ): - indexer_func(df)[indexer] = val - else: - with tm.assert_cow_warning(warn_copy_on_write and val == 100): - indexer_func(df)[indexer] = val + + indexer_func(df)[indexer] = val if using_copy_on_write: assert np.shares_memory(get_array(df, "b"), get_array(view, "b")) @@ -1128,13 +1059,12 @@ def test_set_value_copy_only_necessary_column( assert np.shares_memory(get_array(df, "a"), get_array(view, "a")) -def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): +def test_series_midx_slice(using_copy_on_write): ser = Series([1, 2, 3], index=pd.MultiIndex.from_arrays([[1, 1, 2], [3, 4, 5]])) ser_orig = ser.copy() result = ser[1] assert np.shares_memory(get_array(ser), get_array(result)) - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[0] = 100 + result.iloc[0] = 100 if using_copy_on_write: tm.assert_series_equal(ser, ser_orig) else: @@ -1144,7 +1074,7 @@ def test_series_midx_slice(using_copy_on_write, warn_copy_on_write): tm.assert_series_equal(ser, expected) -def test_getitem_midx_slice(using_copy_on_write, warn_copy_on_write): +def test_getitem_midx_slice(using_copy_on_write): df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2}) df_orig = df.copy() new_df = df[("a",)] @@ -1157,25 +1087,20 @@ def test_getitem_midx_slice(using_copy_on_write, warn_copy_on_write): new_df.iloc[0, 0] = 100 tm.assert_frame_equal(df_orig, df) else: - if warn_copy_on_write: - with tm.assert_cow_warning(): + with pd.option_context("chained_assignment", "warn"): + with tm.assert_produces_warning(SettingWithCopyWarning): new_df.iloc[0, 0] = 100 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - new_df.iloc[0, 0] = 100 assert df.iloc[0, 0] == 100 -def test_series_midx_tuples_slice(using_copy_on_write, warn_copy_on_write): +def test_series_midx_tuples_slice(using_copy_on_write): ser = Series( [1, 2, 3], index=pd.MultiIndex.from_tuples([((1, 2), 3), ((1, 2), 4), ((2, 3), 4)]), ) result = ser[(1, 2)] assert np.shares_memory(get_array(ser), get_array(result)) - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[0] = 100 + result.iloc[0] = 100 if using_copy_on_write: expected = Series( [1, 2, 3], diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index ddc5879a56d54..d72600956a6d6 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -10,7 +10,6 @@ Series, Timestamp, interval_range, - option_context, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -91,13 +90,12 @@ def test_interpolate_inplace_no_reference_no_copy(using_copy_on_write, vals): @pytest.mark.parametrize( "vals", [[1, np.nan, 2], [Timestamp("2019-12-31"), NaT, Timestamp("2020-12-31")]] ) -def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_write): +def test_interpolate_inplace_with_refs(using_copy_on_write, vals): df = DataFrame({"a": [1, np.nan, 2]}) df_orig = df.copy() arr = get_array(df, "a") view = df[:] - with tm.assert_cow_warning(warn_copy_on_write): - df.interpolate(method="linear", inplace=True) + df.interpolate(method="linear", inplace=True) if using_copy_on_write: # Check that copy was triggered in interpolate and that we don't @@ -112,17 +110,14 @@ def test_interpolate_inplace_with_refs(using_copy_on_write, vals, warn_copy_on_w @pytest.mark.parametrize("func", ["ffill", "bfill"]) @pytest.mark.parametrize("dtype", ["float64", "Float64"]) -def test_interp_fill_functions_inplace( - using_copy_on_write, func, warn_copy_on_write, dtype -): +def test_interp_fill_functions_inplace(using_copy_on_write, func, dtype): # Check that these takes the same code paths as interpolate df = DataFrame({"a": [1, np.nan, 2]}, dtype=dtype) df_orig = df.copy() arr = get_array(df, "a") view = df[:] - with tm.assert_cow_warning(warn_copy_on_write and dtype == "float64"): - getattr(df, func)(inplace=True) + getattr(df, func)(inplace=True) if using_copy_on_write: # Check that copy was triggered in interpolate and that we don't @@ -255,15 +250,14 @@ def test_fillna_inplace(using_copy_on_write, downcast): assert df._mgr._has_no_reference(1) -def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write): +def test_fillna_inplace_reference(using_copy_on_write): df = DataFrame({"a": [1.5, np.nan], "b": 1}) df_orig = df.copy() arr_a = get_array(df, "a") arr_b = get_array(df, "b") view = df[:] - with tm.assert_cow_warning(warn_copy_on_write): - df.fillna(5.5, inplace=True) + df.fillna(5.5, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) assert np.shares_memory(get_array(df, "b"), arr_b) @@ -277,7 +271,7 @@ def test_fillna_inplace_reference(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(df, expected) -def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_write): +def test_fillna_interval_inplace_reference(using_copy_on_write): # Set dtype explicitly to avoid implicit cast when setting nan ser = Series( interval_range(start=0, end=5), name="a", dtype="interval[float64, right]" @@ -286,8 +280,7 @@ def test_fillna_interval_inplace_reference(using_copy_on_write, warn_copy_on_wri ser_orig = ser.copy() view = ser[:] - with tm.assert_cow_warning(warn_copy_on_write): - ser.fillna(value=Interval(left=0, right=5), inplace=True) + ser.fillna(value=Interval(left=0, right=5), inplace=True) if using_copy_on_write: assert not np.shares_memory( @@ -353,13 +346,12 @@ def test_fillna_ea_noop_shares_memory( def test_fillna_inplace_ea_noop_shares_memory( - using_copy_on_write, warn_copy_on_write, any_numeric_ea_and_arrow_dtype + using_copy_on_write, any_numeric_ea_and_arrow_dtype ): df = DataFrame({"a": [1, NA, 3], "b": 1}, dtype=any_numeric_ea_and_arrow_dtype) df_orig = df.copy() view = df[:] - with tm.assert_cow_warning(warn_copy_on_write): - df.fillna(100, inplace=True) + df.fillna(100, inplace=True) if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), get_array(view, "a")) @@ -372,10 +364,7 @@ def test_fillna_inplace_ea_noop_shares_memory( assert not df._mgr._has_no_reference(1) assert not view._mgr._has_no_reference(1) - with tm.assert_cow_warning( - warn_copy_on_write and "pyarrow" not in any_numeric_ea_and_arrow_dtype - ): - df.iloc[0, 1] = 100 + df.iloc[0, 1] = 100 if isinstance(df["a"].dtype, ArrowDtype) or using_copy_on_write: tm.assert_frame_equal(df_orig, view) else: @@ -383,50 +372,26 @@ def test_fillna_inplace_ea_noop_shares_memory( tm.assert_frame_equal(df, view) -def test_fillna_chained_assignment(using_copy_on_write): +def test_fillna_chained_assignment(): df = DataFrame({"a": [1, np.nan, 2], "b": 1}) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].fillna(100, inplace=True) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - df[["a"]].fillna(100, inplace=True) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].fillna(100, inplace=True) - - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df.a > 5].fillna(100, inplace=True) + with tm.raises_chained_assignment_error(): + df["a"].fillna(100, inplace=True) + tm.assert_frame_equal(df, df_orig) - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].fillna(100, inplace=True) + with tm.raises_chained_assignment_error(): + df[["a"]].fillna(100, inplace=True) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("func", ["interpolate", "ffill", "bfill"]) -def test_interpolate_chained_assignment(using_copy_on_write, func): +def test_interpolate_chained_assignment(func): df = DataFrame({"a": [1, np.nan, 2], "b": 1}) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - getattr(df["a"], func)(inplace=True) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - getattr(df[["a"]], func)(inplace=True) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - getattr(df["a"], func)(inplace=True) - - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[["a"]], func)(inplace=True) + with tm.raises_chained_assignment_error(): + getattr(df["a"], func)(inplace=True) + tm.assert_frame_equal(df, df_orig) - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[df["a"] > 1], func)(inplace=True) + with tm.raises_chained_assignment_error(): + getattr(df[["a"]], func)(inplace=True) + tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 590829b6dc759..b3bd63e1c7e4c 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -40,7 +40,7 @@ def test_copy(using_copy_on_write): assert df.iloc[0, 0] == 1 -def test_copy_shallow(using_copy_on_write, warn_copy_on_write): +def test_copy_shallow(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_copy = df.copy(deep=False) @@ -70,8 +70,7 @@ def test_copy_shallow(using_copy_on_write, warn_copy_on_write): assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c")) else: # mutating shallow copy does mutate original - with tm.assert_cow_warning(warn_copy_on_write): - df_copy.iloc[0, 0] = 0 + df_copy.iloc[0, 0] = 0 assert df.iloc[0, 0] == 0 # and still shares memory assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a")) @@ -525,15 +524,14 @@ def test_shift_rows_freq(using_copy_on_write): tm.assert_frame_equal(df2, df_orig) -def test_shift_columns(using_copy_on_write, warn_copy_on_write): +def test_shift_columns(using_copy_on_write): df = DataFrame( [[1, 2], [3, 4], [5, 6]], columns=date_range("2020-01-01", "2020-01-02") ) df2 = df.shift(periods=1, axis=1) assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory( get_array(df2, "2020-01-02"), get_array(df, "2020-01-01") @@ -545,7 +543,7 @@ def test_shift_columns(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(df2, expected) -def test_pop(using_copy_on_write, warn_copy_on_write): +def test_pop(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() view_original = df[:] @@ -557,8 +555,7 @@ def test_pop(using_copy_on_write, warn_copy_on_write): if using_copy_on_write: result.iloc[0] = 0 assert not np.shares_memory(result.values, get_array(view_original, "a")) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b")) tm.assert_frame_equal(view_original, df_orig) @@ -649,7 +646,7 @@ def test_align_with_series_copy_false(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) # Original is unchanged -def test_to_frame(using_copy_on_write, warn_copy_on_write): +def test_to_frame(using_copy_on_write): # Case: converting a Series to a DataFrame with to_frame ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -659,8 +656,7 @@ def test_to_frame(using_copy_on_write, warn_copy_on_write): # currently this always returns a "view" assert np.shares_memory(ser.values, get_array(df, 0)) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 if using_copy_on_write: # mutating df triggers a copy-on-write for that column @@ -674,8 +670,7 @@ def test_to_frame(using_copy_on_write, warn_copy_on_write): # modify original series -> don't modify dataframe df = ser[:].to_frame() - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 0 + ser.iloc[0] = 0 if using_copy_on_write: tm.assert_frame_equal(df, ser_orig.to_frame()) @@ -744,7 +739,7 @@ def test_swapaxes_read_only_array(): ], ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], ) -def test_chained_methods(request, method, idx, using_copy_on_write, warn_copy_on_write): +def test_chained_methods(request, method, idx, using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) df_orig = df.copy() @@ -753,15 +748,13 @@ def test_chained_methods(request, method, idx, using_copy_on_write, warn_copy_on # modify df2 -> don't modify df df2 = method(df) - with tm.assert_cow_warning(warn_copy_on_write and df2_is_view): - df2.iloc[0, idx] = 0 + df2.iloc[0, idx] = 0 if not df2_is_view: tm.assert_frame_equal(df, df_orig) # modify df -> don't modify df2 df2 = method(df) - with tm.assert_cow_warning(warn_copy_on_write and df2_is_view): - df.iloc[0, 0] = 0 + df.iloc[0, 0] = 0 if not df2_is_view: tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) @@ -910,7 +903,7 @@ def test_dropna_series(using_copy_on_write, val): lambda df: df.tail(3), ], ) -def test_head_tail(method, using_copy_on_write, warn_copy_on_write): +def test_head_tail(method, using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) df_orig = df.copy() df2 = method(df) @@ -923,16 +916,14 @@ def test_head_tail(method, using_copy_on_write, warn_copy_on_write): assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) # modify df2 to trigger CoW for that block - with tm.assert_cow_warning(warn_copy_on_write): - df2.iloc[0, 0] = 0 + df2.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b")) assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) else: # without CoW enabled, head and tail return views. Mutating df2 also mutates df. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b")) - with tm.assert_cow_warning(warn_copy_on_write): - df2.iloc[0, 0] = 1 + df2.iloc[0, 0] = 1 tm.assert_frame_equal(df, df_orig) @@ -1146,7 +1137,7 @@ def test_sort_values(using_copy_on_write, obj, kwargs): "obj, kwargs", [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})], ) -def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_write): +def test_sort_values_inplace(using_copy_on_write, obj, kwargs): obj_orig = obj.copy() view = obj[:] obj.sort_values(inplace=True, **kwargs) @@ -1154,8 +1145,7 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ assert np.shares_memory(get_array(obj, "a"), get_array(view, "a")) # mutating obj triggers a copy-on-write for the column / block - with tm.assert_cow_warning(warn_copy_on_write): - obj.iloc[0] = 0 + obj.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a")) tm.assert_equal(view, obj_orig) @@ -1164,7 +1154,7 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ @pytest.mark.parametrize("decimals", [-1, 0, 1]) -def test_round(using_copy_on_write, warn_copy_on_write, decimals): +def test_round(using_copy_on_write, decimals): df = DataFrame({"a": [1, 2], "b": "c"}) df_orig = df.copy() df2 = df.round(decimals=decimals) @@ -1279,7 +1269,7 @@ def test_series_set_axis(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) -def test_set_flags(using_copy_on_write, warn_copy_on_write): +def test_set_flags(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() ser2 = ser.set_flags(allows_duplicate_labels=False) @@ -1287,8 +1277,7 @@ def test_set_flags(using_copy_on_write, warn_copy_on_write): assert np.shares_memory(ser, ser2) # mutating ser triggers a copy-on-write for the column / block - with tm.assert_cow_warning(warn_copy_on_write): - ser2.iloc[0] = 0 + ser2.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(ser2, ser) tm.assert_series_equal(ser, ser_orig) @@ -1361,7 +1350,7 @@ def test_droplevel(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_squeeze(using_copy_on_write, warn_copy_on_write): +def test_squeeze(using_copy_on_write): df = DataFrame({"a": [1, 2, 3]}) df_orig = df.copy() series = df.squeeze() @@ -1370,8 +1359,7 @@ def test_squeeze(using_copy_on_write, warn_copy_on_write): assert np.shares_memory(series.values, get_array(df, "a")) # mutating squeezed df triggers a copy-on-write for that column/block - with tm.assert_cow_warning(warn_copy_on_write): - series.iloc[0] = 0 + series.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(series.values, get_array(df, "a")) tm.assert_frame_equal(df, df_orig) @@ -1381,7 +1369,7 @@ def test_squeeze(using_copy_on_write, warn_copy_on_write): assert df.loc[0, "a"] == 0 -def test_items(using_copy_on_write, warn_copy_on_write): +def test_items(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df_orig = df.copy() @@ -1392,8 +1380,7 @@ def test_items(using_copy_on_write, warn_copy_on_write): assert np.shares_memory(get_array(ser, name), get_array(df, name)) # mutating df triggers a copy-on-write for that column / block - with tm.assert_cow_warning(warn_copy_on_write): - ser.iloc[0] = 0 + ser.iloc[0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(ser, name), get_array(df, name)) @@ -1404,12 +1391,11 @@ def test_items(using_copy_on_write, warn_copy_on_write): @pytest.mark.parametrize("dtype", ["int64", "Int64"]) -def test_putmask(using_copy_on_write, dtype, warn_copy_on_write): +def test_putmask(using_copy_on_write, dtype): df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype) view = df[:] df_orig = df.copy() - with tm.assert_cow_warning(warn_copy_on_write): - df[df == df] = 5 + df[df == df] = 5 if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1443,21 +1429,15 @@ def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype): @pytest.mark.parametrize( "val, exp, warn", [(5.5, True, FutureWarning), (5, False, None)] ) -def test_putmask_dont_copy_some_blocks( - using_copy_on_write, val, exp, warn, warn_copy_on_write -): +def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp, warn): df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5}) view = df[:] df_orig = df.copy() indexer = DataFrame( [[True, False, False], [True, False, False]], columns=list("abc") ) - if warn_copy_on_write: - with tm.assert_cow_warning(): - df[indexer] = val - else: - with tm.assert_produces_warning(warn, match="incompatible dtype"): - df[indexer] = val + with tm.assert_produces_warning(warn, match="incompatible dtype"): + df[indexer] = val if using_copy_on_write: assert not np.shares_memory(get_array(view, "a"), get_array(df, "a")) @@ -1598,16 +1578,14 @@ def test_iterrows(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_interpolate_creates_copy(using_copy_on_write, warn_copy_on_write): +def test_interpolate_creates_copy(using_copy_on_write): # GH#51126 df = DataFrame({"a": [1.5, np.nan, 3]}) view = df[:] expected = df.copy() - with tm.assert_cow_warning(warn_copy_on_write): - df.ffill(inplace=True) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 100.5 + df.ffill(inplace=True) + df.iloc[0, 0] = 100.5 if using_copy_on_write: tm.assert_frame_equal(view, expected) @@ -1683,7 +1661,7 @@ def test_isetitem_frame(using_copy_on_write): @pytest.mark.parametrize("key", ["a", ["a"]]) -def test_get(using_copy_on_write, warn_copy_on_write, key): +def test_get(using_copy_on_write, key): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() @@ -1697,10 +1675,7 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): else: # for non-CoW it depends on whether we got a Series or DataFrame if it # is a view or copy or triggers a warning or not - if warn_copy_on_write: - warn = FutureWarning if isinstance(key, str) else None - else: - warn = SettingWithCopyWarning if isinstance(key, list) else None + warn = SettingWithCopyWarning if isinstance(key, list) else None with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(warn): result.iloc[0] = 0 @@ -1715,7 +1690,7 @@ def test_get(using_copy_on_write, warn_copy_on_write, key): @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_xs(using_copy_on_write, warn_copy_on_write, axis, key, dtype): +def test_xs(using_copy_on_write, axis, key, dtype): single_block = dtype == "int64" df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} @@ -1729,11 +1704,8 @@ def test_xs(using_copy_on_write, warn_copy_on_write, axis, key, dtype): elif using_copy_on_write: assert result._mgr._has_no_reference(0) - if using_copy_on_write or (single_block and not warn_copy_on_write): + if using_copy_on_write or single_block: result.iloc[0] = 0 - elif warn_copy_on_write: - with tm.assert_cow_warning(single_block or axis == 1): - result.iloc[0] = 0 else: with option_context("chained_assignment", "warn"): with tm.assert_produces_warning(SettingWithCopyWarning): @@ -1747,7 +1719,7 @@ def test_xs(using_copy_on_write, warn_copy_on_write, axis, key, dtype): @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)]) -def test_xs_multiindex(using_copy_on_write, warn_copy_on_write, key, level, axis): +def test_xs_multiindex(using_copy_on_write, key, level, axis): arr = np.arange(18).reshape(6, 3) index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"]) df = DataFrame(arr, index=index, columns=list("abc")) @@ -1762,9 +1734,7 @@ def test_xs_multiindex(using_copy_on_write, warn_copy_on_write, key, level, axis get_array(df, df.columns[0]), get_array(result, result.columns[0]) ) - if warn_copy_on_write: - warn = FutureWarning if level == 0 else None - elif not using_copy_on_write: + if not using_copy_on_write: warn = SettingWithCopyWarning else: warn = None @@ -1775,15 +1745,12 @@ def test_xs_multiindex(using_copy_on_write, warn_copy_on_write, key, level, axis tm.assert_frame_equal(df, df_orig) -def test_update_frame(using_copy_on_write, warn_copy_on_write): +def test_update_frame(using_copy_on_write): df1 = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]}) df2 = DataFrame({"b": [100.0]}, index=[1]) df1_orig = df1.copy() view = df1[:] - - # TODO(CoW) better warning message? - with tm.assert_cow_warning(warn_copy_on_write): - df1.update(df2) + df1.update(df2) expected = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 100.0, 6.0]}) tm.assert_frame_equal(df1, expected) @@ -1796,17 +1763,13 @@ def test_update_frame(using_copy_on_write, warn_copy_on_write): tm.assert_frame_equal(view, expected) -def test_update_series(using_copy_on_write, warn_copy_on_write): +def test_update_series(using_copy_on_write): ser1 = Series([1.0, 2.0, 3.0]) ser2 = Series([100.0], index=[1]) ser1_orig = ser1.copy() view = ser1[:] - if warn_copy_on_write: - with tm.assert_cow_warning(): - ser1.update(ser2) - else: - ser1.update(ser2) + ser1.update(ser2) expected = Series([1.0, 100.0, 3.0]) tm.assert_series_equal(ser1, expected) @@ -1817,29 +1780,17 @@ def test_update_series(using_copy_on_write, warn_copy_on_write): tm.assert_series_equal(view, expected) -def test_update_chained_assignment(using_copy_on_write): +def test_update_chained_assignment(): df = DataFrame({"a": [1, 2, 3]}) ser2 = Series([100.0], index=[1]) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].update(ser2) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - df[["a"]].update(ser2.to_frame()) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].update(ser2) - - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].update(ser2.to_frame()) + with tm.raises_chained_assignment_error(): + df["a"].update(ser2) + tm.assert_frame_equal(df, df_orig) - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df["a"] > 1].update(ser2.to_frame()) + with tm.raises_chained_assignment_error(): + df[["a"]].update(ser2.to_frame()) + tm.assert_frame_equal(df, df_orig) def test_inplace_arithmetic_series(using_copy_on_write): @@ -1860,14 +1811,11 @@ def test_inplace_arithmetic_series(using_copy_on_write): tm.assert_numpy_array_equal(data, get_array(ser)) -def test_inplace_arithmetic_series_with_reference( - using_copy_on_write, warn_copy_on_write -): +def test_inplace_arithmetic_series_with_reference(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() view = ser[:] - with tm.assert_cow_warning(warn_copy_on_write): - ser *= 2 + ser *= 2 if using_copy_on_write: assert not np.shares_memory(get_array(ser), get_array(view)) tm.assert_series_equal(ser_orig, view) @@ -1909,7 +1857,7 @@ def test_transpose_ea_single_column(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(result, 0)) -def test_transform_frame(using_copy_on_write, warn_copy_on_write): +def test_transform_frame(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() @@ -1917,13 +1865,12 @@ def func(ser): ser.iloc[0] = 100 return ser - with tm.assert_cow_warning(warn_copy_on_write): - df.transform(func) + df.transform(func) if using_copy_on_write: tm.assert_frame_equal(df, df_orig) -def test_transform_series(using_copy_on_write, warn_copy_on_write): +def test_transform_series(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -1931,8 +1878,7 @@ def func(ser): ser.iloc[0] = 100 return ser - with tm.assert_cow_warning(warn_copy_on_write): - ser.transform(func) + ser.transform(func) if using_copy_on_write: tm.assert_series_equal(ser, ser_orig) @@ -1945,7 +1891,7 @@ def test_count_read_only_array(): tm.assert_series_equal(result, expected) -def test_series_view(using_copy_on_write, warn_copy_on_write): +def test_series_view(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() @@ -1955,8 +1901,7 @@ def test_series_view(using_copy_on_write, warn_copy_on_write): if using_copy_on_write: assert not ser2._mgr._has_no_reference(0) - with tm.assert_cow_warning(warn_copy_on_write): - ser2.iloc[0] = 100 + ser2.iloc[0] = 100 if using_copy_on_write: tm.assert_series_equal(ser_orig, ser) @@ -1994,7 +1939,7 @@ def test_eval(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_eval_inplace(using_copy_on_write, warn_copy_on_write): +def test_eval_inplace(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": 1}) df_orig = df.copy() df_view = df[:] @@ -2002,13 +1947,12 @@ def test_eval_inplace(using_copy_on_write, warn_copy_on_write): df.eval("c = a+b", inplace=True) assert np.shares_memory(get_array(df, "a"), get_array(df_view, "a")) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 100 + df.iloc[0, 0] = 100 if using_copy_on_write: tm.assert_frame_equal(df_view, df_orig) -def test_apply_modify_row(using_copy_on_write, warn_copy_on_write): +def test_apply_modify_row(using_copy_on_write): # Case: applying a function on each row as a Series object, where the # function mutates the row object (which needs to trigger CoW if row is a view) df = DataFrame({"A": [1, 2], "B": [3, 4]}) @@ -2018,8 +1962,7 @@ def transform(row): row["B"] = 100 return row - with tm.assert_cow_warning(warn_copy_on_write): - df.apply(transform, axis=1) + df.apply(transform, axis=1) if using_copy_on_write: tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index 1a0a77b332743..f2ee26c0b9009 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -4,7 +4,6 @@ from pandas import ( Categorical, DataFrame, - option_context, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -48,13 +47,12 @@ def test_replace(using_copy_on_write, replace_kwargs): tm.assert_frame_equal(df, df_orig) -def test_replace_regex_inplace_refs(using_copy_on_write, warn_copy_on_write): +def test_replace_regex_inplace_refs(using_copy_on_write): df = DataFrame({"a": ["aaa", "bbb"]}) df_orig = df.copy() view = df[:] arr = get_array(df, "a") - with tm.assert_cow_warning(warn_copy_on_write): - df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) + df.replace(to_replace=r"^a.*$", value="new", inplace=True, regex=True) if using_copy_on_write: assert not np.shares_memory(arr, get_array(df, "a")) assert df._mgr._has_no_reference(0) @@ -214,12 +212,11 @@ def test_replace_inplace(using_copy_on_write, to_replace): @pytest.mark.parametrize("to_replace", [1.5, [1.5]]) -def test_replace_inplace_reference(using_copy_on_write, to_replace, warn_copy_on_write): +def test_replace_inplace_reference(using_copy_on_write, to_replace): df = DataFrame({"a": [1.5, 2, 3]}) arr_a = get_array(df, "a") view = df[:] - with tm.assert_cow_warning(warn_copy_on_write): - df.replace(to_replace=to_replace, value=15.5, inplace=True) + df.replace(to_replace=to_replace, value=15.5, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) @@ -310,18 +307,14 @@ def test_replace_categorical(using_copy_on_write, val): @pytest.mark.parametrize("method", ["where", "mask"]) -def test_masking_inplace(using_copy_on_write, method, warn_copy_on_write): +def test_masking_inplace(using_copy_on_write, method): df = DataFrame({"a": [1.5, 2, 3]}) df_orig = df.copy() arr_a = get_array(df, "a") view = df[:] method = getattr(df, method) - if warn_copy_on_write: - with tm.assert_cow_warning(): - method(df["a"] > 1.6, -1, inplace=True) - else: - method(df["a"] > 1.6, -1, inplace=True) + method(df["a"] > 1.6, -1, inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr_a) @@ -385,13 +378,12 @@ def test_replace_list_none(using_copy_on_write): assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a")) -def test_replace_list_none_inplace_refs(using_copy_on_write, warn_copy_on_write): +def test_replace_list_none_inplace_refs(using_copy_on_write): df = DataFrame({"a": ["a", "b", "c"]}) arr = get_array(df, "a") df_orig = df.copy() view = df[:] - with tm.assert_cow_warning(warn_copy_on_write): - df.replace(["a"], value=None, inplace=True) + df.replace(["a"], value=None, inplace=True) if using_copy_on_write: assert df._mgr._has_no_reference(0) assert not np.shares_memory(arr, get_array(df, "a")) @@ -421,28 +413,16 @@ def test_replace_columnwise_no_op(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_chained_assignment(using_copy_on_write): +def test_replace_chained_assignment(): df = DataFrame({"a": [1, np.nan, 2], "b": 1}) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].replace(1, 100, inplace=True) - tm.assert_frame_equal(df, df_orig) - - with tm.raises_chained_assignment_error(): - df[["a"]].replace(1, 100, inplace=True) - tm.assert_frame_equal(df, df_orig) - else: - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].replace(1, 100, inplace=True) - - with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df.a > 5].replace(1, 100, inplace=True) + with tm.raises_chained_assignment_error(): + df["a"].replace(1, 100, inplace=True) + tm.assert_frame_equal(df, df_orig) - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].replace(1, 100, inplace=True) + with tm.raises_chained_assignment_error(): + df[["a"]].replace(1, 100, inplace=True) + tm.assert_frame_equal(df, df_orig) def test_replace_listlike(using_copy_on_write): @@ -463,7 +443,7 @@ def test_replace_listlike(using_copy_on_write): tm.assert_frame_equal(df, df_orig) -def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write): +def test_replace_listlike_inplace(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) arr = get_array(df, "a") df.replace([200, 2], [10, 11], inplace=True) @@ -471,8 +451,7 @@ def test_replace_listlike_inplace(using_copy_on_write, warn_copy_on_write): view = df[:] df_orig = df.copy() - with tm.assert_cow_warning(warn_copy_on_write): - df.replace([200, 3], [10, 11], inplace=True) + df.replace([200, 3], [10, 11], inplace=True) if using_copy_on_write: assert not np.shares_memory(get_array(df, "a"), arr) tm.assert_frame_equal(view, df_orig) diff --git a/pandas/tests/copy_view/test_setitem.py b/pandas/tests/copy_view/test_setitem.py index bc3b939734534..6104699cbc51b 100644 --- a/pandas/tests/copy_view/test_setitem.py +++ b/pandas/tests/copy_view/test_setitem.py @@ -142,7 +142,7 @@ def test_setitem_series_column_midx_broadcasting(using_copy_on_write): assert df._mgr._has_no_reference(0) -def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_write): +def test_set_column_with_inplace_operator(using_copy_on_write): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) # this should not raise any warning @@ -152,5 +152,4 @@ def test_set_column_with_inplace_operator(using_copy_on_write, warn_copy_on_writ # when it is not in a chain, then it should produce a warning df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) ser = df["a"] - with tm.assert_cow_warning(warn_copy_on_write): - ser += 1 + ser += 1 diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index a36b0c0e850b3..73683922bcc92 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -392,14 +392,13 @@ def test_getitem_empty_frame_with_boolean(self): tm.assert_frame_equal(df, df2) def test_getitem_returns_view_when_column_is_unique_in_df( - self, using_copy_on_write, warn_copy_on_write + self, using_copy_on_write ): # GH#45316 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) df_orig = df.copy() view = df["b"] - with tm.assert_cow_warning(warn_copy_on_write): - view.loc[:] = 100 + view.loc[:] = 100 if using_copy_on_write: expected = df_orig else: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 0373c15d15272..c8787ac0b364e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -287,9 +287,7 @@ def test_setattr_column(self): df.foobar = 5 assert (df.foobar == 5).all() - def test_setitem( - self, float_frame, using_copy_on_write, warn_copy_on_write, using_infer_string - ): + def test_setitem(self, float_frame, using_copy_on_write, using_infer_string): # not sure what else to do here series = float_frame["A"][::2] float_frame["col5"] = series @@ -325,7 +323,7 @@ def test_setitem( smaller = float_frame[:2] msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: # With CoW, adding a new column doesn't raise a warning smaller["col10"] = ["1", "2"] else: @@ -574,7 +572,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self): df2.loc[3:11] = 0 def test_fancy_getitem_slice_mixed( - self, float_frame, float_string_frame, using_copy_on_write, warn_copy_on_write + self, float_frame, float_string_frame, using_copy_on_write ): sliced = float_string_frame.iloc[:, -3:] assert sliced["D"].dtype == np.float64 @@ -586,8 +584,7 @@ def test_fancy_getitem_slice_mixed( assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) - with tm.assert_cow_warning(warn_copy_on_write): - sliced.loc[:, "C"] = 4.0 + sliced.loc[:, "C"] = 4.0 if not using_copy_on_write: assert (float_frame["C"] == 4).all() @@ -1062,7 +1059,7 @@ def test_iloc_row(self): expected = df.reindex(df.index[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_row_slice_view(self, using_copy_on_write, warn_copy_on_write): + def test_iloc_row_slice_view(self, using_copy_on_write): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2) ) @@ -1075,8 +1072,7 @@ def test_iloc_row_slice_view(self, using_copy_on_write, warn_copy_on_write): assert np.shares_memory(df[2], subset[2]) exp_col = original[2].copy() - with tm.assert_cow_warning(warn_copy_on_write): - subset.loc[:, 2] = 0.0 + subset.loc[:, 2] = 0.0 if not using_copy_on_write: exp_col._values[4:8] = 0.0 @@ -1107,7 +1103,7 @@ def test_iloc_col(self): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_col_slice_view(self, using_copy_on_write, warn_copy_on_write): + def test_iloc_col_slice_view(self, using_copy_on_write): df = DataFrame( np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) @@ -1118,8 +1114,7 @@ def test_iloc_col_slice_view(self, using_copy_on_write, warn_copy_on_write): # verify slice is view assert np.shares_memory(df[8]._values, subset[8]._values) - with tm.assert_cow_warning(warn_copy_on_write): - subset.loc[:, 8] = 0.0 + subset.loc[:, 8] = 0.0 assert (df[8] == 0).all() @@ -1401,7 +1396,7 @@ def test_loc_setitem_rhs_frame(self, idxr, val): expected = DataFrame({"a": [np.nan, val]}) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_enlarge_no_warning(self, warn_copy_on_write): + def test_iloc_setitem_enlarge_no_warning(self): # GH#47381 df = DataFrame(columns=["a", "b"]) expected = df.copy() diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 72cd98ba78122..2df01b2cdb721 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -844,7 +844,7 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): class TestDataFrameSetItemWithExpansion: - def test_setitem_listlike_views(self, using_copy_on_write, warn_copy_on_write): + def test_setitem_listlike_views(self, using_copy_on_write): # GH#38148 df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) @@ -855,8 +855,7 @@ def test_setitem_listlike_views(self, using_copy_on_write, warn_copy_on_write): df[["c", "d"]] = np.array([[0.1, 0.2], [0.3, 0.4], [0.4, 0.5]]) # edit in place the first column to check view semantics - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 100 + df.iloc[0, 0] = 100 if using_copy_on_write: expected = Series([1, 2, 3], name="a") @@ -1299,9 +1298,7 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer): df[indexer] = set_value tm.assert_frame_equal(view, expected) - def test_setitem_column_update_inplace( - self, using_copy_on_write, warn_copy_on_write - ): + def test_setitem_column_update_inplace(self, using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/47172 labels = [f"c{i}" for i in range(10)] diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 80b4635b94d3b..dc2f0b61e3ba0 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -60,7 +60,7 @@ def test_xs_dt_error(self, datetime_frame): ): datetime_frame.xs(datetime_frame.index[0] - BDay()) - def test_xs_other(self, float_frame, using_copy_on_write, warn_copy_on_write): + def test_xs_other(self, float_frame, using_copy_on_write): float_frame_orig = float_frame.copy() # xs get column series = float_frame.xs("A", axis=1) @@ -69,8 +69,7 @@ def test_xs_other(self, float_frame, using_copy_on_write, warn_copy_on_write): # view is returned if possible series = float_frame.xs("A", axis=1) - with tm.assert_cow_warning(warn_copy_on_write): - series[:] = 5 + series[:] = 5 if using_copy_on_write: # but with CoW the view shouldn't propagate mutations tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) @@ -123,7 +122,7 @@ def test_xs_keep_level(self): result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) - def test_xs_view(self, using_copy_on_write, warn_copy_on_write): + def test_xs_view(self, using_copy_on_write): # in 0.14 this will return a view if possible a copy otherwise, but # this is numpy dependent @@ -199,14 +198,13 @@ def test_xs_setting_with_copy_error( self, multiindex_dataframe_random_data, using_copy_on_write, - warn_copy_on_write, ): # this is a copy in 0.14 df = multiindex_dataframe_random_data df_orig = df.copy() result = df.xs("two", level="second") - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: result[:] = 10 else: # setting this will give a SettingWithCopyError @@ -217,14 +215,14 @@ def test_xs_setting_with_copy_error( tm.assert_frame_equal(df, df_orig) def test_xs_setting_with_copy_error_multiple( - self, four_level_index_dataframe, using_copy_on_write, warn_copy_on_write + self, four_level_index_dataframe, using_copy_on_write ): # this is a copy in 0.14 df = four_level_index_dataframe df_orig = df.copy() result = df.xs(("a", 4), level=["one", "four"]) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: result[:] = 10 else: # setting this will give a SettingWithCopyError @@ -392,15 +390,14 @@ def test_xs_droplevel_false(self): expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - def test_xs_droplevel_false_view(self, using_copy_on_write, warn_copy_on_write): + def test_xs_droplevel_false_view(self, using_copy_on_write): # GH#37832 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) # check that result still views the same data as df assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values) - with tm.assert_cow_warning(warn_copy_on_write): - df.iloc[0, 0] = 2 + df.iloc[0, 0] = 2 if using_copy_on_write: # with copy on write the subset is never modified expected = DataFrame({"a": [1]}) diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 04a08c8b9bc52..2a50137c2d6ef 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -207,7 +207,7 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method): expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) - def test_corr_item_cache(self, using_copy_on_write, warn_copy_on_write): + def test_corr_item_cache(self, using_copy_on_write): # Check that corr does not lead to incorrect entries in item_cache df = DataFrame({"A": range(10)}) @@ -225,8 +225,7 @@ def test_corr_item_cache(self, using_copy_on_write, warn_copy_on_write): # Check that the corr didn't break link between ser and df ser.values[0] = 99 assert df.loc[0, "A"] == 99 - if not warn_copy_on_write: - assert df["A"] is ser + assert df["A"] is ser assert df.values[0, 0] == 99 @pytest.mark.parametrize("length", [2, 20, 200, 2000]) diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 4f661b14ef201..df38ddc6c3116 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -20,18 +20,14 @@ class TestFillNA: - def test_fillna_dict_inplace_nonunique_columns( - self, using_copy_on_write, warn_copy_on_write - ): + def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): df = DataFrame( {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} ) df.columns = ["A", "A", "A"] orig = df[:] - # TODO(CoW-warn) better warning message - with tm.assert_cow_warning(warn_copy_on_write): - df.fillna({"A": 2}, inplace=True) + df.fillna({"A": 2}, inplace=True) # The first and third columns can be set inplace, while the second cannot. expected = DataFrame( @@ -750,15 +746,12 @@ def test_fillna_inplace_with_columns_limit_and_value(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) - def test_inplace_dict_update_view( - self, val, using_copy_on_write, warn_copy_on_write - ): + def test_inplace_dict_update_view(self, val, using_copy_on_write): # GH#47188 df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) df_orig = df.copy() result_view = df[:] - with tm.assert_cow_warning(warn_copy_on_write): - df.fillna(val, inplace=True) + df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected) if using_copy_on_write: diff --git a/pandas/tests/frame/methods/test_pop.py b/pandas/tests/frame/methods/test_pop.py index 3eb058015cd3d..617f0c3a27885 100644 --- a/pandas/tests/frame/methods/test_pop.py +++ b/pandas/tests/frame/methods/test_pop.py @@ -9,7 +9,7 @@ class TestDataFramePop: - def test_pop(self, float_frame, warn_copy_on_write): + def test_pop(self, float_frame): float_frame.columns.name = "baz" float_frame.pop("A") @@ -23,8 +23,7 @@ def test_pop(self, float_frame, warn_copy_on_write): # gh-10912: inplace ops cause caching issue a = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"], index=["X", "Y"]) b = a.pop("B") - with tm.assert_cow_warning(warn_copy_on_write): - b += 1 + b += 1 # original frame expected = DataFrame([[1, 3], [4, 6]], columns=["A", "C"], index=["X", "Y"]) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index c3bc96b44c807..b965a5d973fb6 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -164,13 +164,12 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, float_frame, using_copy_on_write, warn_copy_on_write): + def test_rename_nocopy(self, float_frame, using_copy_on_write): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) - with tm.assert_cow_warning(warn_copy_on_write): - renamed.loc[:, "foo"] = 1.0 + renamed.loc[:, "foo"] = 1.0 if using_copy_on_write: assert not (float_frame["C"] == 1.0).all() else: diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 217010ab2e7ee..19001f10e37e4 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -31,7 +31,7 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write): assert _last_df is not None and not _last_df[column].equals(df[column]) -def test_to_dict_of_blocks_item_cache(using_copy_on_write, warn_copy_on_write): +def test_to_dict_of_blocks_item_cache(using_copy_on_write): # Calling to_dict_of_blocks should not poison item_cache df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object)) @@ -45,11 +45,6 @@ def test_to_dict_of_blocks_item_cache(using_copy_on_write, warn_copy_on_write): if using_copy_on_write: with pytest.raises(ValueError, match="read-only"): ser.values[0] = "foo" - elif warn_copy_on_write: - ser.values[0] = "foo" - assert df.loc[0, "b"] == "foo" - # with warning mode, the item cache is disabled - assert df["b"] is not ser else: # Check that the to_dict_of_blocks didn't break link between ser and df ser.values[0] = "foo" diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 565619005d9f0..7ff8508c3b799 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -138,15 +138,12 @@ def test_update_datetime_tz(self): expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) - def test_update_datetime_tz_in_place(self, using_copy_on_write, warn_copy_on_write): + def test_update_datetime_tz_in_place(self, using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/56227 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) orig = result.copy() view = result[:] - with tm.assert_produces_warning( - FutureWarning if warn_copy_on_write else None, match="Setting a value" - ): - result.update(result + pd.Timedelta(days=1)) + result.update(result + pd.Timedelta(days=1)) expected = DataFrame([pd.Timestamp("2019-01-02", tz="UTC")]) tm.assert_frame_equal(result, expected) if not using_copy_on_write: @@ -170,17 +167,13 @@ def test_update_with_different_dtype(self, using_copy_on_write): ) tm.assert_frame_equal(df, expected) - def test_update_modify_view( - self, using_copy_on_write, warn_copy_on_write, using_infer_string - ): + def test_update_modify_view(self, using_copy_on_write, using_infer_string): # GH#47188 df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) df2_orig = df2.copy() result_view = df2[:] - # TODO(CoW-warn) better warning message - with tm.assert_cow_warning(warn_copy_on_write): - df2.update(df) + df2.update(df) expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) tm.assert_frame_equal(df2, expected) if using_copy_on_write or using_infer_string: diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index c7b444045a0f2..0112e0093c102 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -326,7 +326,6 @@ def test_set_flags( allows_duplicate_labels, frame_or_series, using_copy_on_write, - warn_copy_on_write, ): obj = DataFrame({"A": [1, 2]}) key = (0, 0) @@ -354,15 +353,13 @@ def test_set_flags( else: assert np.may_share_memory(obj["A"].values, result["A"].values) - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[key] = 0 + result.iloc[key] = 0 if using_copy_on_write: assert obj.iloc[key] == 1 else: assert obj.iloc[key] == 0 # set back to 1 for test below - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[key] = 1 + result.iloc[key] = 1 # Now we do copy. result = obj.set_flags( diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d33a7cdcf21c3..4fb0bbafc6879 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2006,15 +2006,14 @@ def test_arith_list_of_arraylike_raise(to_add): to_add + df -def test_inplace_arithmetic_series_update(using_copy_on_write, warn_copy_on_write): +def test_inplace_arithmetic_series_update(using_copy_on_write): # https://github.com/pandas-dev/pandas/issues/36373 df = DataFrame({"A": [1, 2, 3]}) df_orig = df.copy() series = df["A"] vals = series._values - with tm.assert_cow_warning(warn_copy_on_write): - series += 1 + series += 1 if using_copy_on_write: assert series._values is not vals tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 22fff2116510a..36013e1ac949f 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -332,7 +332,7 @@ def test_is_mixed_type(self, float_frame, float_string_frame): assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type - def test_stale_cached_series_bug_473(self, using_copy_on_write, warn_copy_on_write): + def test_stale_cached_series_bug_473(self, using_copy_on_write): # this is chained, but ok with option_context("chained_assignment", None): Y = DataFrame( diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 9ff2b52bd35ff..20f147e94c514 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -287,27 +287,20 @@ def test_constructor_dtype_copy(self): new_df["col1"] = 200.0 assert orig_df["col1"][0] == 1.0 - def test_constructor_dtype_nocast_view_dataframe( - self, using_copy_on_write, warn_copy_on_write - ): + def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) if using_copy_on_write: should_be_view.iloc[0, 0] = 99 assert df.values[0, 0] == 1 else: - with tm.assert_cow_warning(warn_copy_on_write): - should_be_view.iloc[0, 0] = 99 + should_be_view.iloc[0, 0] = 99 assert df.values[0, 0] == 99 - def test_constructor_dtype_nocast_view_2d_array( - self, using_copy_on_write, warn_copy_on_write - ): + def test_constructor_dtype_nocast_view_2d_array(self, using_copy_on_write): df = DataFrame([[1, 2], [3, 4]], dtype="int64") if not using_copy_on_write: should_be_view = DataFrame(df.values, dtype=df[0].dtype) - # TODO(CoW-warn) this should warn - # with tm.assert_cow_warning(warn_copy_on_write): should_be_view.iloc[0, 0] = 97 assert df.values[0, 0] == 97 else: diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 07f76810cbfc8..43d1c74d76db2 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -89,10 +89,8 @@ def test_preserve_getitem(self): assert df.loc[[0]].flags.allows_duplicate_labels is False assert df.loc[0, ["A"]].flags.allows_duplicate_labels is False - def test_ndframe_getitem_caching_issue( - self, request, using_copy_on_write, warn_copy_on_write - ): - if not (using_copy_on_write or warn_copy_on_write): + def test_ndframe_getitem_caching_issue(self, request, using_copy_on_write): + if not using_copy_on_write: request.applymarker(pytest.mark.xfail(reason="Unclear behavior.")) # NDFrame.__getitem__ will cache the first df['A']. May need to # invalidate that cache? Update the cached entries? diff --git a/pandas/tests/groupby/test_apply_mutate.py b/pandas/tests/groupby/test_apply_mutate.py index 29d82cce44807..e5028884e992b 100644 --- a/pandas/tests/groupby/test_apply_mutate.py +++ b/pandas/tests/groupby/test_apply_mutate.py @@ -75,7 +75,7 @@ def test_no_mutate_but_looks_like(): tm.assert_series_equal(result1, result2) -def test_apply_function_with_indexing(warn_copy_on_write): +def test_apply_function_with_indexing(): # GH: 33058 df = pd.DataFrame( {"col1": ["A", "A", "A", "B", "B", "B"], "col2": [1, 2, 3, 4, 5, 6]} @@ -86,9 +86,7 @@ def fn(x): return x.col2 msg = "DataFrameGroupBy.apply operated on the grouping columns" - with tm.assert_produces_warning( - DeprecationWarning, match=msg, raise_on_extra_warnings=not warn_copy_on_write - ): + with tm.assert_produces_warning(DeprecationWarning, match=msg): result = df.groupby(["col1"], as_index=False).apply(fn) expected = pd.Series( [1, 2, 0, 4, 5, 0], diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 50103011693bc..1a32dcefed91a 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -1196,7 +1196,7 @@ def test_groupby_prod_with_int64_dtype(): tm.assert_frame_equal(result, expected) -def test_groupby_std_datetimelike(warn_copy_on_write): +def test_groupby_std_datetimelike(): # GH#48481 tdi = pd.timedelta_range("1 Day", periods=10000) ser = Series(tdi) diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 4fab12f195dc0..a7873594ecade 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -12,9 +12,7 @@ class TestPeriodIndex: - def test_getitem_periodindex_duplicates_string_slice( - self, using_copy_on_write, warn_copy_on_write - ): + def test_getitem_periodindex_duplicates_string_slice(self, using_copy_on_write): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="Y-JUN") ts = Series(np.random.default_rng(2).standard_normal(len(idx)), index=idx) @@ -23,8 +21,7 @@ def test_getitem_periodindex_duplicates_string_slice( result = ts["2007"] expected = ts[1:3] tm.assert_series_equal(result, expected) - with tm.assert_cow_warning(warn_copy_on_write): - result[:] = 1 + result[:] = 1 if using_copy_on_write: tm.assert_series_equal(ts, original) else: diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 014ba6fc12b72..24a111e283365 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -12,7 +12,7 @@ import pandas._testing as tm -def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): +def test_detect_chained_assignment(using_copy_on_write): # Inplace ops, originally from: # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug a = [12, 23] @@ -32,9 +32,6 @@ def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): if using_copy_on_write: with tm.raises_chained_assignment_error(): zed["eyes"]["right"].fillna(value=555, inplace=True) - elif warn_copy_on_write: - with tm.assert_produces_warning(None): - zed["eyes"]["right"].fillna(value=555, inplace=True) else: msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(SettingWithCopyError, match=msg): @@ -42,7 +39,7 @@ def test_detect_chained_assignment(using_copy_on_write, warn_copy_on_write): zed["eyes"]["right"].fillna(value=555, inplace=True) -def test_cache_updating(using_copy_on_write, warn_copy_on_write): +def test_cache_updating(using_copy_on_write): # 5216 # make sure that we don't try to set a dead cache a = np.random.default_rng(2).random((10, 3)) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 830c187a205a8..b68ab18fbc9b2 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -120,7 +120,6 @@ def test_partial_set( self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write, - warn_copy_on_write, ): # GH #397 ymd = multiindex_year_month_day_dataframe_random_data diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 22a0a49762097..17b00244c70f5 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -196,9 +196,7 @@ def test_multiindex_assignment(self): df.loc[4, "d"] = arr tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) - def test_multiindex_assignment_single_dtype( - self, using_copy_on_write, warn_copy_on_write - ): + def test_multiindex_assignment_single_dtype(self, using_copy_on_write): # GH3777 part 2b # single dtype arr = np.array([0.0, 1.0]) @@ -233,8 +231,7 @@ def test_multiindex_assignment_single_dtype( tm.assert_series_equal(result, exp) # scalar ok - with tm.assert_cow_warning(warn_copy_on_write): - df.loc[4, "c"] = 10 + df.loc[4, "c"] = 10 exp = Series(10, index=[8, 10], name="c", dtype="float64") tm.assert_series_equal(df.loc[4, "c"], exp) @@ -248,8 +245,7 @@ def test_multiindex_assignment_single_dtype( # But with a length-1 listlike column indexer this behaves like # `df.loc[4, "c"] = 0 - with tm.assert_cow_warning(warn_copy_on_write): - df.loc[4, ["c"]] = [0] + df.loc[4, ["c"]] = [0] assert (df.loc[4, "c"] == 0).all() def test_groupby_example(self): @@ -274,20 +270,16 @@ def test_groupby_example(self): new_vals = np.arange(df2.shape[0]) df.loc[name, "new_col"] = new_vals - def test_series_setitem( - self, multiindex_year_month_day_dataframe_random_data, warn_copy_on_write - ): + def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): ymd = multiindex_year_month_day_dataframe_random_data s = ymd["A"] - with tm.assert_cow_warning(warn_copy_on_write): - s[2000, 3] = np.nan + s[2000, 3] = np.nan assert isna(s.values[42:65]).all() assert notna(s.values[:42]).all() assert notna(s.values[65:]).all() - with tm.assert_cow_warning(warn_copy_on_write): - s[2000, 3, 10] = np.nan + s[2000, 3, 10] = np.nan assert isna(s.iloc[49]) with pytest.raises(KeyError, match="49"): @@ -423,7 +415,7 @@ def test_setitem_change_dtype(self, multiindex_dataframe_random_data): tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) def test_set_column_scalar_with_loc( - self, multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write + self, multiindex_dataframe_random_data, using_copy_on_write ): frame = multiindex_dataframe_random_data subset = frame.index[[1, 4, 5]] @@ -433,8 +425,7 @@ def test_set_column_scalar_with_loc( frame_original = frame.copy() col = frame["B"] - with tm.assert_cow_warning(warn_copy_on_write): - col[subset] = 97 + col[subset] = 97 if using_copy_on_write: # chained setitem doesn't work with CoW tm.assert_frame_equal(frame, frame_original) @@ -532,11 +523,11 @@ def test_frame_setitem_view_direct( def test_frame_setitem_copy_raises( - multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write + multiindex_dataframe_random_data, using_copy_on_write ): # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 else: @@ -547,12 +538,12 @@ def test_frame_setitem_copy_raises( def test_frame_setitem_copy_no_write( - multiindex_dataframe_random_data, using_copy_on_write, warn_copy_on_write + multiindex_dataframe_random_data, using_copy_on_write ): frame = multiindex_dataframe_random_data.T expected = frame df = frame.copy() - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: with tm.raises_chained_assignment_error(): df["foo"]["one"] = 2 else: diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 5acfb72c4a666..6dbe4f2b3ed3a 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -70,9 +70,7 @@ def test_setitem_cache_updating(self, do_ref): assert df.loc[0, "c"] == 0.0 assert df.loc[7, "c"] == 1.0 - def test_setitem_cache_updating_slices( - self, using_copy_on_write, warn_copy_on_write - ): + def test_setitem_cache_updating_slices(self, using_copy_on_write): # GH 7084 # not updating cache on series setting with slices expected = DataFrame( @@ -96,9 +94,7 @@ def test_setitem_cache_updating_slices( out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - with tm.raises_chained_assignment_error( - (ix == 0) or warn_copy_on_write or using_copy_on_write - ): + with tm.raises_chained_assignment_error((ix == 0) or using_copy_on_write): out[row["C"]][six:eix] = v if not using_copy_on_write: @@ -115,14 +111,12 @@ def test_setitem_cache_updating_slices( tm.assert_frame_equal(out, expected) tm.assert_series_equal(out["A"], expected["A"]) - def test_altering_series_clears_parent_cache( - self, using_copy_on_write, warn_copy_on_write - ): + def test_altering_series_clears_parent_cache(self, using_copy_on_write): # GH #33675 df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) ser = df["A"] - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: assert "A" not in df._item_cache else: assert "A" in df._item_cache @@ -210,9 +204,7 @@ def test_detect_chained_assignment(self, using_copy_on_write): tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment_raises( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_raises(self, using_copy_on_write): # test with the chaining df = DataFrame( { @@ -229,11 +221,6 @@ def test_detect_chained_assignment_raises( with tm.raises_chained_assignment_error(): df["A"][1] = -6 tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - with tm.raises_chained_assignment_error(): - df["A"][1] = np.nan else: with pytest.raises(SettingWithCopyError, match=msg): with tm.raises_chained_assignment_error(): @@ -246,9 +233,7 @@ def test_detect_chained_assignment_raises( assert df["A"]._is_copy is None @pytest.mark.arm_slow - def test_detect_chained_assignment_fails( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_fails(self, using_copy_on_write): # Using a copy (the chain), fails df = DataFrame( { @@ -257,7 +242,7 @@ def test_detect_chained_assignment_fails( } ) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: with tm.raises_chained_assignment_error(): df.loc[0]["A"] = -5 else: @@ -265,9 +250,7 @@ def test_detect_chained_assignment_fails( df.loc[0]["A"] = -5 @pytest.mark.arm_slow - def test_detect_chained_assignment_doc_example( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_doc_example(self, using_copy_on_write): # Doc example df = DataFrame( { @@ -278,7 +261,7 @@ def test_detect_chained_assignment_doc_example( assert df._is_copy is None indexer = df.a.str.startswith("o") - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: with tm.raises_chained_assignment_error(): df[indexer]["c"] = 42 else: @@ -286,16 +269,14 @@ def test_detect_chained_assignment_doc_example( df[indexer]["c"] = 42 @pytest.mark.arm_slow - def test_detect_chained_assignment_object_dtype( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_object_dtype(self, using_copy_on_write): expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) df = DataFrame( {"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]} ) df_original = df.copy() - if not using_copy_on_write and not warn_copy_on_write: + if not using_copy_on_write: with pytest.raises(SettingWithCopyError, match=msg): df.loc[0]["A"] = 111 @@ -303,10 +284,6 @@ def test_detect_chained_assignment_object_dtype( with tm.raises_chained_assignment_error(): df["A"][0] = 111 tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = 111 - tm.assert_frame_equal(df, expected) else: with pytest.raises(SettingWithCopyError, match=msg): with tm.raises_chained_assignment_error(): @@ -358,10 +335,8 @@ def test_detect_chained_assignment_implicit_take(self): df["letters"] = df["letters"].apply(str.lower) @pytest.mark.arm_slow - def test_detect_chained_assignment_implicit_take2( - self, using_copy_on_write, warn_copy_on_write - ): - if using_copy_on_write or warn_copy_on_write: + def test_detect_chained_assignment_implicit_take2(self, using_copy_on_write): + if using_copy_on_write: pytest.skip("_is_copy is not always set for CoW") # Implicitly take 2 df = random_text(100000) @@ -415,9 +390,7 @@ def test_detect_chained_assignment_false_positives(self): str(df) @pytest.mark.arm_slow - def test_detect_chained_assignment_undefined_column( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): # from SO: # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc df = DataFrame(np.arange(0, 9), columns=["count"]) @@ -428,18 +401,13 @@ def test_detect_chained_assignment_undefined_column( with tm.raises_chained_assignment_error(): df.iloc[0:5]["group"] = "a" tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df.iloc[0:5]["group"] = "a" else: with pytest.raises(SettingWithCopyError, match=msg): with tm.raises_chained_assignment_error(): df.iloc[0:5]["group"] = "a" @pytest.mark.arm_slow - def test_detect_chained_assignment_changing_dtype( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_changing_dtype(self, using_copy_on_write): # Mixed type setting but same dtype & changing dtype df = DataFrame( { @@ -451,7 +419,7 @@ def test_detect_chained_assignment_changing_dtype( ) df_original = df.copy() - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: with tm.raises_chained_assignment_error(): df.loc[2]["D"] = "foo" with tm.raises_chained_assignment_error(): @@ -474,7 +442,7 @@ def test_detect_chained_assignment_changing_dtype( with tm.raises_chained_assignment_error(): df["C"][2] = "foo" - def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): + def test_setting_with_copy_bug(self, using_copy_on_write): # operating on a copy df = DataFrame( {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} @@ -486,9 +454,6 @@ def test_setting_with_copy_bug(self, using_copy_on_write, warn_copy_on_write): with tm.raises_chained_assignment_error(): df[["c"]][mask] = df[["b"]][mask] tm.assert_frame_equal(df, df_original) - elif warn_copy_on_write: - with tm.raises_chained_assignment_error(): - df[["c"]][mask] = df[["b"]][mask] else: with pytest.raises(SettingWithCopyError, match=msg): df[["c"]][mask] = df[["b"]][mask] @@ -502,11 +467,9 @@ def test_setting_with_copy_bug_no_warning(self): # this should not raise df2["y"] = ["g", "h", "i"] - def test_detect_chained_assignment_warnings_errors( - self, using_copy_on_write, warn_copy_on_write - ): + def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: with tm.raises_chained_assignment_error(): df.loc[0]["A"] = 111 return @@ -521,14 +484,14 @@ def test_detect_chained_assignment_warnings_errors( @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) def test_detect_chained_assignment_warning_stacklevel( - self, rhs, using_copy_on_write, warn_copy_on_write + self, rhs, using_copy_on_write ): # GH#42570 df = DataFrame(np.arange(25).reshape(5, 5)) df_original = df.copy() chained = df.loc[:3] with option_context("chained_assignment", "warn"): - if not using_copy_on_write and not warn_copy_on_write: + if not using_copy_on_write: with tm.assert_produces_warning(SettingWithCopyWarning) as t: chained[2] = rhs assert t[0].filename == __file__ diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index 5b8c4f2d4b9b9..4497c16efdfda 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -5,7 +5,6 @@ Series, period_range, ) -import pandas._testing as tm def test_iat(float_frame): @@ -31,9 +30,7 @@ def test_iat_getitem_series_with_period_index(): assert expected == result -def test_iat_setitem_item_cache_cleared( - indexer_ial, using_copy_on_write, warn_copy_on_write -): +def test_iat_setitem_item_cache_cleared(indexer_ial, using_copy_on_write): # GH#45684 data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)} df = DataFrame(data).copy() @@ -41,11 +38,9 @@ def test_iat_setitem_item_cache_cleared( # previously this iat setting would split the block and fail to clear # the item_cache. - with tm.assert_cow_warning(warn_copy_on_write): - indexer_ial(df)[7, 0] = 9999 + indexer_ial(df)[7, 0] = 9999 - with tm.assert_cow_warning(warn_copy_on_write): - indexer_ial(df)[7, 1] = 1234 + indexer_ial(df)[7, 1] = 1234 assert df.iat[7, 1] == 1234 if not using_copy_on_write: diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 7b2a9dd99d925..5453c8be0e832 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -428,7 +428,7 @@ def test_iloc_getitem_slice_dups(self): tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) - def test_iloc_setitem(self, warn_copy_on_write): + def test_iloc_setitem(self): df = DataFrame( np.random.default_rng(2).standard_normal((4, 4)), index=np.arange(0, 8, 2), @@ -843,9 +843,7 @@ def test_iloc_empty_list_indexer_is_ok(self): df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object( - self, using_copy_on_write, warn_copy_on_write - ): + def test_identity_slice_returns_new_object(self, using_copy_on_write): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.iloc[:] @@ -856,8 +854,7 @@ def test_identity_slice_returns_new_object( # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig # depending on CoW - with tm.assert_cow_warning(warn_copy_on_write): - original_df.loc[:, "a"] = [4, 4, 4] + original_df.loc[:, "a"] = [4, 4, 4] if using_copy_on_write: assert (sliced_df["a"] == [1, 2, 3]).all() else: @@ -868,8 +865,7 @@ def test_identity_slice_returns_new_object( assert sliced_series is not original_series # should also be a shallow copy - with tm.assert_cow_warning(warn_copy_on_write): - original_series[:3] = [7, 8, 9] + original_series[:3] = [7, 8, 9] if using_copy_on_write: # shallow copy not updated (CoW) assert all(sliced_series[:3] == [1, 2, 3]) @@ -1233,9 +1229,7 @@ def test_iloc_setitem_multicolumn_to_datetime(self): class TestILocErrors: # NB: this test should work for _any_ Series we can pass as # series_with_simple_index - def test_iloc_float_raises( - self, series_with_simple_index, frame_or_series, warn_copy_on_write - ): + def test_iloc_float_raises(self, series_with_simple_index, frame_or_series): # GH#4892 # float_indexers should raise exceptions # on appropriate Index types & accessors @@ -1252,10 +1246,7 @@ def test_iloc_float_raises( obj.iloc[3.0] with pytest.raises(IndexError, match=_slice_iloc_msg): - with tm.assert_cow_warning( - warn_copy_on_write and frame_or_series is DataFrame - ): - obj.iloc[3.0] = 0 + obj.iloc[3.0] = 0 def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): with pytest.raises(IndexingError, match="Too many indexers"): @@ -1423,7 +1414,7 @@ def test_frame_iloc_setitem_callable(self): class TestILocSeries: - def test_iloc(self, using_copy_on_write, warn_copy_on_write): + def test_iloc(self, using_copy_on_write): ser = Series( np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2)) ) @@ -1442,8 +1433,7 @@ def test_iloc(self, using_copy_on_write, warn_copy_on_write): # test slice is a view with tm.assert_produces_warning(None): # GH#45324 make sure we aren't giving a spurious FutureWarning - with tm.assert_cow_warning(warn_copy_on_write): - result[:] = 0 + result[:] = 0 if using_copy_on_write: tm.assert_series_equal(ser, ser_original) else: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b155c3aabd287..193c296115479 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -832,8 +832,7 @@ def test_loc_setitem_frame_mixed_labels(self): df.loc[0, [1, 2]] = [5, 6] tm.assert_frame_equal(df, expected) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_loc_setitem_frame_multiples(self, warn_copy_on_write): + def test_loc_setitem_frame_multiples(self): # multiple setting df = DataFrame( {"A": ["foo", "bar", "baz"], "B": Series(range(3), dtype=np.int64)} @@ -1090,9 +1089,7 @@ def test_loc_empty_list_indexer_is_ok(self): df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object( - self, using_copy_on_write, warn_copy_on_write - ): + def test_identity_slice_returns_new_object(self, using_copy_on_write): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) @@ -1106,8 +1103,7 @@ def test_identity_slice_returns_new_object( # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig # depending on CoW - with tm.assert_cow_warning(warn_copy_on_write): - original_df.loc[:, "a"] = [4, 4, 4] + original_df.loc[:, "a"] = [4, 4, 4] if using_copy_on_write: assert (sliced_df["a"] == [1, 2, 3]).all() else: @@ -1115,7 +1111,7 @@ def test_identity_slice_returns_new_object( # These should not return copies df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) - if using_copy_on_write or warn_copy_on_write: + if using_copy_on_write: assert df[0] is not df.loc[:, 0] else: assert df[0] is df.loc[:, 0] @@ -1126,8 +1122,7 @@ def test_identity_slice_returns_new_object( assert sliced_series is not original_series assert original_series[:] is not original_series - with tm.assert_cow_warning(warn_copy_on_write): - original_series[:3] = [7, 8, 9] + original_series[:3] = [7, 8, 9] if using_copy_on_write: assert all(sliced_series[:3] == [1, 2, 3]) else: @@ -2651,9 +2646,7 @@ def test_loc_setitem_boolean_and_column(self, float_frame): expected = DataFrame(values, index=expected.index, columns=expected.columns) tm.assert_frame_equal(float_frame, expected) - def test_loc_setitem_ndframe_values_alignment( - self, using_copy_on_write, warn_copy_on_write - ): + def test_loc_setitem_ndframe_values_alignment(self, using_copy_on_write): # GH#45501 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df.loc[[False, False, True], ["a"]] = DataFrame( @@ -2676,8 +2669,7 @@ def test_loc_setitem_ndframe_values_alignment( df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df_orig = df.copy() ser = df["a"] - with tm.assert_cow_warning(warn_copy_on_write): - ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0]) + ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0]) if using_copy_on_write: tm.assert_frame_equal(df, df_orig) else: diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 2365ff62b1680..9de14b3a7c112 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -281,7 +281,7 @@ def test_dt_accessor_ambiguous_freq_conversions(self): expected = Series(exp_values, name="xxx") tm.assert_series_equal(ser, expected) - def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write): + def test_dt_accessor_not_writeable(self, using_copy_on_write): # no setting allowed ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): @@ -293,11 +293,6 @@ def test_dt_accessor_not_writeable(self, using_copy_on_write, warn_copy_on_write if using_copy_on_write: with tm.raises_chained_assignment_error(): ser.dt.hour[0] = 5 - elif warn_copy_on_write: - with tm.assert_produces_warning( - FutureWarning, match="ChainedAssignmentError" - ): - ser.dt.hour[0] = 5 else: with pytest.raises(SettingWithCopyError, match=msg): ser.dt.hour[0] = 5 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 191104b3f330f..50c167f3f3a28 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -101,14 +101,13 @@ def test_basic_getitem_dt64tz_values(): assert result == expected -def test_getitem_setitem_ellipsis(using_copy_on_write, warn_copy_on_write): +def test_getitem_setitem_ellipsis(using_copy_on_write): s = Series(np.random.default_rng(2).standard_normal(10)) result = s[...] tm.assert_series_equal(result, s) - with tm.assert_cow_warning(warn_copy_on_write): - s[...] = 5 + s[...] = 5 if not using_copy_on_write: assert (result == 5).all() @@ -243,7 +242,7 @@ def test_basic_getitem_setitem_corner(datetime_series): datetime_series[[5, [None, None]]] = 2 -def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_write): +def test_slice(string_series, object_series, using_copy_on_write): original = string_series.copy() numSlice = string_series[10:20] numSliceEnd = string_series[-10:] @@ -260,8 +259,7 @@ def test_slice(string_series, object_series, using_copy_on_write, warn_copy_on_w # Test return view. sl = string_series[10:20] - with tm.assert_cow_warning(warn_copy_on_write): - sl[:] = 0 + sl[:] = 0 if using_copy_on_write: # Doesn't modify parent (CoW) diff --git a/pandas/tests/series/methods/test_copy.py b/pandas/tests/series/methods/test_copy.py index 23dbe85075916..ea439fb5a3263 100644 --- a/pandas/tests/series/methods/test_copy.py +++ b/pandas/tests/series/methods/test_copy.py @@ -10,7 +10,7 @@ class TestCopy: @pytest.mark.parametrize("deep", ["default", None, False, True]) - def test_copy(self, deep, using_copy_on_write, warn_copy_on_write): + def test_copy(self, deep, using_copy_on_write): ser = Series(np.arange(10), dtype="float64") # default deep is True @@ -27,8 +27,7 @@ def test_copy(self, deep, using_copy_on_write, warn_copy_on_write): else: assert not np.may_share_memory(ser.values, ser2.values) - with tm.assert_cow_warning(warn_copy_on_write and deep is False): - ser2[::2] = np.nan + ser2[::2] = np.nan if deep is not False or using_copy_on_write: # Did not modify original Series diff --git a/pandas/tests/series/methods/test_get_numeric_data.py b/pandas/tests/series/methods/test_get_numeric_data.py index 8325cc884ebcb..11dc6d5c57162 100644 --- a/pandas/tests/series/methods/test_get_numeric_data.py +++ b/pandas/tests/series/methods/test_get_numeric_data.py @@ -7,17 +7,14 @@ class TestGetNumericData: - def test_get_numeric_data_preserve_dtype( - self, using_copy_on_write, warn_copy_on_write - ): + def test_get_numeric_data_preserve_dtype(self, using_copy_on_write): # get the numeric data obj = Series([1, 2, 3]) result = obj._get_numeric_data() tm.assert_series_equal(result, obj) # returned object is a shallow copy - with tm.assert_cow_warning(warn_copy_on_write): - result.iloc[0] = 0 + result.iloc[0] = 0 if using_copy_on_write: assert obj.iloc[0] == 1 else: diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py index 119654bd19b3f..e59389ab069d3 100644 --- a/pandas/tests/series/methods/test_rename.py +++ b/pandas/tests/series/methods/test_rename.py @@ -169,13 +169,12 @@ def test_rename_error_arg(self): with pytest.raises(KeyError, match=match): ser.rename({2: 9}, errors="raise") - def test_rename_copy_false(self, using_copy_on_write, warn_copy_on_write): + def test_rename_copy_false(self, using_copy_on_write): # GH 46889 ser = Series(["foo", "bar"]) ser_orig = ser.copy() shallow_copy = ser.rename({1: 9}, copy=False) - with tm.assert_cow_warning(warn_copy_on_write): - ser[0] = "foobar" + ser[0] = "foobar" if using_copy_on_write: assert ser_orig[0] == shallow_copy[0] assert ser_orig[1] == shallow_copy[9] diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 55ca1f98f6d6c..a1e08f484ebba 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -891,14 +891,12 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp with pytest.raises(IntCastingNaNError, match=msg): Series(np.array(vals), dtype=any_int_numpy_dtype) - def test_constructor_dtype_no_cast(self, using_copy_on_write, warn_copy_on_write): + def test_constructor_dtype_no_cast(self, using_copy_on_write): # see gh-1572 s = Series([1, 2, 3]) s2 = Series(s, dtype=np.int64) - warn = FutureWarning if warn_copy_on_write else None - with tm.assert_produces_warning(warn): - s2[1] = 5 + s2[1] = 5 if using_copy_on_write: assert s[1] == 2 else: From a7cd9b53206d3c90c2a54248a3fe5b3f280ab4ab Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 01:17:41 +0000 Subject: [PATCH 31/50] Remove Copy-on-Write builds (#57238) --- .github/workflows/unit-tests.yml | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 0fffedcb6ae88..533b81013a264 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -57,22 +57,6 @@ jobs: # Also install zh_CN (its encoding is gb2312) but do not activate it. # It will be temporarily activated during tests with locale.setlocale extra_loc: "zh_CN" - - name: "Copy-on-Write 3.9" - env_file: actions-39.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "1" - - name: "Copy-on-Write 3.10" - env_file: actions-310.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "1" - - name: "Copy-on-Write 3.11" - env_file: actions-311.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "1" - - name: "Copy-on-Write 3.12" - env_file: actions-312.yaml - pattern: "not slow and not network and not single_cpu" - pandas_copy_on_write: "1" - name: "Pypy" env_file: actions-pypy-39.yaml pattern: "not slow and not network and not single_cpu" @@ -101,7 +85,6 @@ jobs: PATTERN: ${{ matrix.pattern }} LANG: ${{ matrix.lang || 'C.UTF-8' }} LC_ALL: ${{ matrix.lc_all || '' }} - PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: ${{ matrix.pytest_workers || 'auto' }} @@ -111,7 +94,7 @@ jobs: QT_QPA_PLATFORM: offscreen concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }} + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}} cancel-in-progress: true services: From ce50a85e2a072754601da4183a93432892defced Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 01:20:05 +0000 Subject: [PATCH 32/50] REGR: to_json converting nullable ints to floats (#57232) * REGR: to_json converting nullable ints to floats * Add skip --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/core/arrays/arrow/array.py | 5 +++++ pandas/core/arrays/masked.py | 3 +++ pandas/tests/io/json/test_pandas.py | 16 ++++++++++++++++ 4 files changed, 25 insertions(+) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 13d5024b5a131..3cc11974b14e5 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` raising ``IndexError`` for non-unique, masked dtype indexes where result has more than 10,000 rows (:issue:`57027`) - Fixed regression in :meth:`DataFrame.sort_index` not producing a stable sort for a index with duplicates (:issue:`57151`) - Fixed regression in :meth:`DataFrame.to_dict` with ``orient='list'`` and datetime or timedelta types returning integers (:issue:`54824`) +- Fixed regression in :meth:`DataFrame.to_json` converting nullable integers to floats (:issue:`57224`) - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` ignoring the ``skipna`` argument (:issue:`57040`) - Fixed regression in :meth:`DataFrameGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmax` where values containing the minimum or maximum value for the dtype could produce incorrect results (:issue:`57040`) - Fixed regression in :meth:`Index.join` raising ``TypeError`` when joining an empty index to a non-empty index containing mixed dtype values (:issue:`57048`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 7bab8c9395ac6..32044d1fc233a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1364,6 +1364,11 @@ def _to_timedeltaarray(self) -> TimedeltaArray: np_array = np_array.astype(np_dtype) return TimedeltaArray._simple_new(np_array, dtype=np_dtype) + def _values_for_json(self) -> np.ndarray: + if is_numeric_dtype(self.dtype): + return np.asarray(self, dtype=object) + return super()._values_for_json() + @doc(ExtensionArray.to_numpy) def to_numpy( self, diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index d5ae6a6025029..f04c50251f19e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -431,6 +431,9 @@ def __abs__(self) -> Self: # ------------------------------------------------------------------ + def _values_for_json(self) -> np.ndarray: + return np.asarray(self, dtype=object) + def to_numpy( self, dtype: npt.DTypeLike | None = None, diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 848e1d5bc47a6..a22d4666e3b2d 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -2160,3 +2160,19 @@ def test_json_pos_args_deprecation(): with tm.assert_produces_warning(FutureWarning, match=msg): buf = BytesIO() df.to_json(buf, "split") + + +@td.skip_if_no("pyarrow") +def test_to_json_ea_null(): + # GH#57224 + df = DataFrame( + { + "a": Series([1, NA], dtype="int64[pyarrow]"), + "b": Series([2, NA], dtype="Int64"), + } + ) + result = df.to_json(orient="records", lines=True) + expected = """{"a":1,"b":2} +{"a":null,"b":null} +""" + assert result == expected From 1bb48398b46232deb8aac3eba778e474b111307d Mon Sep 17 00:00:00 2001 From: Thad Guidry Date: Sun, 4 Feb 2024 13:21:06 +0800 Subject: [PATCH 33/50] DOC: Fix typo in merging.rst (#57242) Fix typo in merging.rst --- doc/source/user_guide/merging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/merging.rst b/doc/source/user_guide/merging.rst index c9c8478a719f0..1edf3908936db 100644 --- a/doc/source/user_guide/merging.rst +++ b/doc/source/user_guide/merging.rst @@ -249,7 +249,7 @@ a :class:`MultiIndex`) associate specific keys with each original :class:`DataFr p.plot(frames, result, labels=["df1", "df2", "df3"], vertical=True) plt.close("all"); -The ``keys`` argument cane override the column names +The ``keys`` argument can override the column names when creating a new :class:`DataFrame` based on existing :class:`Series`. .. ipython:: python From 36d454ae9412f8c3ac28e13765b821c0581db9e6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:17:30 +0000 Subject: [PATCH 34/50] DEPR: Remove first and last from DataFrame (#57246) --- doc/source/reference/frame.rst | 2 - doc/source/reference/series.rst | 2 - doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/generic.py | 164 ------------------ .../frame/methods/test_first_and_last.py | 139 --------------- pandas/tests/generic/test_finalize.py | 45 ----- 6 files changed, 1 insertion(+), 353 deletions(-) delete mode 100644 pandas/tests/frame/methods/test_first_and_last.py diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 1d9019ff22c23..1ade30faa123b 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -186,11 +186,9 @@ Reindexing / selection / label manipulation DataFrame.duplicated DataFrame.equals DataFrame.filter - DataFrame.first DataFrame.head DataFrame.idxmax DataFrame.idxmin - DataFrame.last DataFrame.reindex DataFrame.reindex_like DataFrame.rename diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index a4ea0ec396ceb..28e7cf82b3478 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -183,12 +183,10 @@ Reindexing / selection / label manipulation Series.drop_duplicates Series.duplicated Series.equals - Series.first Series.head Series.idxmax Series.idxmin Series.isin - Series.last Series.reindex Series.reindex_like Series.rename diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 806a46c248e15..768bb9e99407a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -102,13 +102,13 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed :meth:`DataFrame.first` and :meth:`DataFrame.last` (:issue:`53710`) - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) - Removed ``axis`` argument from :meth:`DataFrame.groupby`, :meth:`Series.groupby`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.resample`, and :meth:`Series.resample` (:issue:`51203`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) - Removed the ``ArrayManager`` (:issue:`55043`) -- .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 490a47d16871c..676b3741f9843 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -36,7 +36,6 @@ from pandas._libs.lib import is_range_indexer from pandas._libs.tslibs import ( Period, - Tick, Timestamp, to_offset, ) @@ -9646,169 +9645,6 @@ def resample( group_keys=group_keys, ) - @final - def first(self, offset) -> Self: - """ - Select initial periods of time series data based on a date offset. - - .. deprecated:: 2.1 - :meth:`.first` is deprecated and will be removed in a future version. - Please create a mask and filter using `.loc` instead. - - For a DataFrame with a sorted DatetimeIndex, this function can - select the first few rows based on a date offset. - - Parameters - ---------- - offset : str, DateOffset or dateutil.relativedelta - The offset length of the data that will be selected. For instance, - '1ME' will display all the rows having their index within the first month. - - Returns - ------- - Series or DataFrame - A subset of the caller. - - Raises - ------ - TypeError - If the index is not a :class:`DatetimeIndex` - - See Also - -------- - last : Select final periods of time series based on a date offset. - at_time : Select values at a particular time of the day. - between_time : Select values between particular times of the day. - - Examples - -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') - >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) - >>> ts - A - 2018-04-09 1 - 2018-04-11 2 - 2018-04-13 3 - 2018-04-15 4 - - Get the rows for the first 3 days: - - >>> ts.first('3D') - A - 2018-04-09 1 - 2018-04-11 2 - - Notice the data for 3 first calendar days were returned, not the first - 3 days observed in the dataset, and therefore data for 2018-04-13 was - not returned. - """ - warnings.warn( - "first is deprecated and will be removed in a future version. " - "Please create a mask and filter using `.loc` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - if not isinstance(self.index, DatetimeIndex): - raise TypeError("'first' only supports a DatetimeIndex index") - - if len(self.index) == 0: - return self.copy(deep=False) - - offset = to_offset(offset) - if not isinstance(offset, Tick) and offset.is_on_offset(self.index[0]): - # GH#29623 if first value is end of period, remove offset with n = 1 - # before adding the real offset - end_date = end = self.index[0] - offset.base + offset - else: - end_date = end = self.index[0] + offset - - # Tick-like, e.g. 3 weeks - if isinstance(offset, Tick) and end_date in self.index: - end = self.index.searchsorted(end_date, side="left") - return self.iloc[:end] - - return self.loc[:end] - - @final - def last(self, offset) -> Self: - """ - Select final periods of time series data based on a date offset. - - .. deprecated:: 2.1 - :meth:`.last` is deprecated and will be removed in a future version. - Please create a mask and filter using `.loc` instead. - - For a DataFrame with a sorted DatetimeIndex, this function - selects the last few rows based on a date offset. - - Parameters - ---------- - offset : str, DateOffset, dateutil.relativedelta - The offset length of the data that will be selected. For instance, - '3D' will display all the rows having their index within the last 3 days. - - Returns - ------- - Series or DataFrame - A subset of the caller. - - Raises - ------ - TypeError - If the index is not a :class:`DatetimeIndex` - - See Also - -------- - first : Select initial periods of time series based on a date offset. - at_time : Select values at a particular time of the day. - between_time : Select values between particular times of the day. - - Notes - ----- - .. deprecated:: 2.1.0 - Please create a mask and filter using `.loc` instead - - Examples - -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='2D') - >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) - >>> ts - A - 2018-04-09 1 - 2018-04-11 2 - 2018-04-13 3 - 2018-04-15 4 - - Get the rows for the last 3 days: - - >>> ts.last('3D') # doctest: +SKIP - A - 2018-04-13 3 - 2018-04-15 4 - - Notice the data for 3 last calendar days were returned, not the last - 3 observed days in the dataset, and therefore data for 2018-04-11 was - not returned. - """ - warnings.warn( - "last is deprecated and will be removed in a future version. " - "Please create a mask and filter using `.loc` instead", - FutureWarning, - stacklevel=find_stack_level(), - ) - - if not isinstance(self.index, DatetimeIndex): - raise TypeError("'last' only supports a DatetimeIndex index") - - if len(self.index) == 0: - return self.copy(deep=False) - - offset = to_offset(offset) - - start_date = self.index[-1] - offset - start = self.index.searchsorted(start_date, side="right") - return self.iloc[start:] - @final def rank( self, diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py deleted file mode 100644 index 2170cf254fbe6..0000000000000 --- a/pandas/tests/frame/methods/test_first_and_last.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -Note: includes tests for `last` -""" -import numpy as np -import pytest - -import pandas as pd -from pandas import ( - DataFrame, - Index, - bdate_range, - date_range, -) -import pandas._testing as tm - -deprecated_msg = "first is deprecated" -last_deprecated_msg = "last is deprecated" - - -class TestFirst: - def test_first_subset(self, frame_or_series): - ts = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="12h"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("10d") - assert len(result) == 20 - - ts = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="D"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("10d") - assert len(result) == 10 - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("3ME") - expected = ts[:"3/31/2000"] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts.first("21D") - expected = ts[:21] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = ts[:0].first("3ME") - tm.assert_equal(result, ts[:0]) - - def test_first_last_raises(self, frame_or_series): - # GH#20725 - obj = DataFrame([[1, 2, 3], [4, 5, 6]]) - obj = tm.get_obj(obj, frame_or_series) - - msg = "'first' only supports a DatetimeIndex index" - with tm.assert_produces_warning( - FutureWarning, match=deprecated_msg - ), pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - obj.first("1D") - - msg = "'last' only supports a DatetimeIndex index" - with tm.assert_produces_warning( - FutureWarning, match=last_deprecated_msg - ), pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex - obj.last("1D") - - def test_last_subset(self, frame_or_series): - ts = DataFrame( - np.random.default_rng(2).standard_normal((100, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=100, freq="12h"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("10d") - assert len(result) == 20 - - ts = DataFrame( - np.random.default_rng(2).standard_normal((30, 4)), - columns=Index(list("ABCD"), dtype=object), - index=date_range("2000-01-01", periods=30, freq="D"), - ) - ts = tm.get_obj(ts, frame_or_series) - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("10d") - assert len(result) == 10 - - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("21D") - expected = ts["2000-01-10":] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts.last("21D") - expected = ts[-21:] - tm.assert_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = ts[:0].last("3ME") - tm.assert_equal(result, ts[:0]) - - @pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)]) - def test_first_with_first_day_last_of_month(self, frame_or_series, start, periods): - # GH#29623 - x = frame_or_series([1] * 100, index=bdate_range(start, periods=100)) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = x.first("1ME") - expected = frame_or_series( - [1] * periods, index=bdate_range(start, periods=periods) - ) - tm.assert_equal(result, expected) - - def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series): - # GH#29623 - x = frame_or_series([1] * 100, index=bdate_range("2010-03-31", periods=100)) - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = x.first("2ME") - expected = frame_or_series( - [1] * 23, index=bdate_range("2010-03-31", "2010-04-30") - ) - tm.assert_equal(result, expected) - - def test_empty_not_input(self): - # GH#51032 - df = DataFrame(index=pd.DatetimeIndex([])) - with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg): - result = df.last(offset=1) - - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = df.first(offset=1) - - tm.assert_frame_equal(df, result) - assert df is not result diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index f25e7d4ab8c79..7cf5ccc4ed24f 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -8,7 +8,6 @@ import pytest import pandas as pd -import pandas._testing as tm # TODO: # * Binary methods (mul, div, etc.) @@ -303,16 +302,6 @@ ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), operator.methodcaller("between_time", "12:00", "13:00"), ), - ( - pd.Series, - (1, pd.date_range("2000", periods=4)), - operator.methodcaller("last", "3D"), - ), - ( - pd.DataFrame, - ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - operator.methodcaller("last", "3D"), - ), (pd.Series, ([1, 2],), operator.methodcaller("rank")), (pd.DataFrame, frame_data, operator.methodcaller("rank")), (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))), @@ -388,7 +377,6 @@ def idfn(x): @pytest.mark.filterwarnings( "ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning", - "ignore:last is deprecated:FutureWarning", ) @pytest.mark.parametrize("ndframe_method", _all_methods, ids=lambda x: idfn(x[-1])) def test_finalize_called(ndframe_method): @@ -401,39 +389,6 @@ def test_finalize_called(ndframe_method): assert result.attrs == {"a": 1} -@pytest.mark.parametrize( - "data", - [ - pd.Series(1, pd.date_range("2000", periods=4)), - pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - ], -) -def test_finalize_first(data): - deprecated_msg = "first is deprecated" - - data.attrs = {"a": 1} - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = data.first("3D") - assert result.attrs == {"a": 1} - - -@pytest.mark.parametrize( - "data", - [ - pd.Series(1, pd.date_range("2000", periods=4)), - pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)), - ], -) -def test_finalize_last(data): - # GH 53710 - deprecated_msg = "last is deprecated" - - data.attrs = {"a": 1} - with tm.assert_produces_warning(FutureWarning, match=deprecated_msg): - result = data.last("3D") - assert result.attrs == {"a": 1} - - @not_implemented_mark def test_finalize_called_eval_numexpr(): pytest.importorskip("numexpr") From bc58fe54e2125c70a3483beada191ca8b2329165 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 15:18:21 +0000 Subject: [PATCH 35/50] DEPR: Remove SettingWithCopyWarning (#56614) * DEPR: Remove SettingWithCopyWarning * Fixup * Remove docs * CoW: Boolean indexer in MultiIndex raising read-only error * Update * Update * Update --- doc/source/reference/testing.rst | 2 - doc/source/user_guide/advanced.rst | 8 +- doc/source/user_guide/indexing.rst | 251 +----------------- doc/source/whatsnew/v0.13.0.rst | 2 +- doc/source/whatsnew/v0.13.1.rst | 4 +- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/_config/__init__.py | 5 - pandas/core/apply.py | 16 +- pandas/core/frame.py | 98 +------ pandas/core/generic.py | 225 +--------------- pandas/core/groupby/groupby.py | 50 ++-- pandas/core/indexes/accessors.py | 22 +- pandas/core/indexing.py | 14 +- pandas/core/interchange/from_dataframe.py | 5 - pandas/core/internals/managers.py | 2 +- pandas/core/series.py | 92 ------- pandas/errors/__init__.py | 50 +--- pandas/io/json/_json.py | 2 - .../test_chained_assignment_deprecation.py | 20 +- pandas/tests/copy_view/test_clip.py | 11 +- pandas/tests/copy_view/test_indexing.py | 226 ++++------------ pandas/tests/copy_view/test_methods.py | 50 +--- pandas/tests/frame/indexing/test_indexing.py | 12 +- pandas/tests/frame/indexing/test_xs.py | 43 +-- pandas/tests/frame/methods/test_asof.py | 13 - pandas/tests/frame/methods/test_sample.py | 2 +- .../tests/frame/methods/test_sort_values.py | 20 +- .../tests/groupby/transform/test_transform.py | 7 +- pandas/tests/indexes/multi/test_get_set.py | 2 - .../multiindex/test_chaining_and_caching.py | 14 +- .../tests/indexing/multiindex/test_setitem.py | 33 +-- .../indexing/test_chaining_and_caching.py | 229 +++------------- .../series/accessors/test_dt_accessor.py | 13 +- .../tests/series/methods/test_sort_values.py | 13 +- pandas/tests/test_downstream.py | 2 + pandas/tests/test_errors.py | 2 - 36 files changed, 209 insertions(+), 1353 deletions(-) diff --git a/doc/source/reference/testing.rst b/doc/source/reference/testing.rst index a5d61703aceed..1f164d1aa98b4 100644 --- a/doc/source/reference/testing.rst +++ b/doc/source/reference/testing.rst @@ -58,8 +58,6 @@ Exceptions and warnings errors.PossiblePrecisionLoss errors.PyperclipException errors.PyperclipWindowsException - errors.SettingWithCopyError - errors.SettingWithCopyWarning errors.SpecificationError errors.UndefinedVariableError errors.UnsortedIndexError diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 453536098cfbb..f7ab466e92d93 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -11,13 +11,6 @@ and :ref:`other advanced indexing features `. See the :ref:`Indexing and Selecting Data ` for general indexing documentation. -.. warning:: - - Whether a copy or a reference is returned for a setting operation may - depend on the context. This is sometimes called ``chained assignment`` and - should be avoided. See :ref:`Returning a View versus Copy - `. - See the :ref:`cookbook` for some advanced strategies. .. _advanced.hierarchical: @@ -402,6 +395,7 @@ slicers on a single axis. Furthermore, you can *set* the values using the following methods. .. ipython:: python + :okwarning: df2 = dfmi.copy() df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10 diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 7c8d3b9e1c869..24cdbad41fe60 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -29,13 +29,6 @@ this area. production code, we recommended that you take advantage of the optimized pandas data access methods exposed in this chapter. -.. warning:: - - Whether a copy or a reference is returned for a setting operation, may - depend on the context. This is sometimes called ``chained assignment`` and - should be avoided. See :ref:`Returning a View versus Copy - `. - See the :ref:`MultiIndex / Advanced Indexing ` for ``MultiIndex`` and more advanced indexing documentation. See the :ref:`cookbook` for some advanced strategies. @@ -299,12 +292,6 @@ largely as a convenience since it is such a common operation. Selection by label ------------------ -.. warning:: - - Whether a copy or a reference is returned for a setting operation, may depend on the context. - This is sometimes called ``chained assignment`` and should be avoided. - See :ref:`Returning a View versus Copy `. - .. warning:: ``.loc`` is strict when you present slicers that are not compatible (or convertible) with the index type. For example @@ -445,12 +432,6 @@ For more information about duplicate labels, see Selection by position --------------------- -.. warning:: - - Whether a copy or a reference is returned for a setting operation, may depend on the context. - This is sometimes called ``chained assignment`` and should be avoided. - See :ref:`Returning a View versus Copy `. - pandas provides a suite of methods in order to get **purely integer based indexing**. The semantics follow closely Python and NumPy slicing. These are ``0-based`` indexing. When slicing, the start bound is *included*, while the upper bound is *excluded*. Trying to use a non-integer, even a **valid** label will raise an ``IndexError``. The ``.iloc`` attribute is the primary access method. The following are valid inputs: @@ -1722,234 +1703,10 @@ You can assign a custom index to the ``index`` attribute: df_idx.index = pd.Index([10, 20, 30, 40], name="a") df_idx -.. _indexing.view_versus_copy: - -Returning a view versus a copy ------------------------------- - -.. warning:: - - :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means that chained indexing will - never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary - anymore. - See :ref:`this section ` - for more context. - We recommend turning Copy-on-Write on to leverage the improvements with - - ``` - pd.options.mode.copy_on_write = True - ``` - - even before pandas 3.0 is available. - -When setting values in a pandas object, care must be taken to avoid what is called -``chained indexing``. Here is an example. - -.. ipython:: python - - dfmi = pd.DataFrame([list('abcd'), - list('efgh'), - list('ijkl'), - list('mnop')], - columns=pd.MultiIndex.from_product([['one', 'two'], - ['first', 'second']])) - dfmi - -Compare these two access methods: - -.. ipython:: python - - dfmi['one']['second'] - -.. ipython:: python - - dfmi.loc[:, ('one', 'second')] - -These both yield the same results, so which should you use? It is instructive to understand the order -of operations on these and why method 2 (``.loc``) is much preferred over method 1 (chained ``[]``). - -``dfmi['one']`` selects the first level of the columns and returns a DataFrame that is singly-indexed. -Then another Python operation ``dfmi_with_one['second']`` selects the series indexed by ``'second'``. -This is indicated by the variable ``dfmi_with_one`` because pandas sees these operations as separate events. -e.g. separate calls to ``__getitem__``, so it has to treat them as linear operations, they happen one after another. - -Contrast this to ``df.loc[:,('one','second')]`` which passes a nested tuple of ``(slice(None),('one','second'))`` to a single call to -``__getitem__``. This allows pandas to deal with this as a single entity. Furthermore this order of operations *can* be significantly -faster, and allows one to index *both* axes if so desired. - Why does assignment fail when using chained indexing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. warning:: - - :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means that chained indexing will - never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary - anymore. - See :ref:`this section ` - for more context. - We recommend turning Copy-on-Write on to leverage the improvements with - - ``` - pd.options.mode.copy_on_write = True - ``` - - even before pandas 3.0 is available. - -The problem in the previous section is just a performance issue. What's up with -the ``SettingWithCopy`` warning? We don't **usually** throw warnings around when -you do something that might cost a few extra milliseconds! - -But it turns out that assigning to the product of chained indexing has -inherently unpredictable results. To see this, think about how the Python -interpreter executes this code: - -.. code-block:: python - - dfmi.loc[:, ('one', 'second')] = value - # becomes - dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) - -But this code is handled differently: - -.. code-block:: python - - dfmi['one']['second'] = value - # becomes - dfmi.__getitem__('one').__setitem__('second', value) - -See that ``__getitem__`` in there? Outside of simple cases, it's very hard to -predict whether it will return a view or a copy (it depends on the memory layout -of the array, about which pandas makes no guarantees), and therefore whether -the ``__setitem__`` will modify ``dfmi`` or a temporary object that gets thrown -out immediately afterward. **That's** what ``SettingWithCopy`` is warning you -about! - -.. note:: You may be wondering whether we should be concerned about the ``loc`` - property in the first example. But ``dfmi.loc`` is guaranteed to be ``dfmi`` - itself with modified indexing behavior, so ``dfmi.loc.__getitem__`` / - ``dfmi.loc.__setitem__`` operate on ``dfmi`` directly. Of course, - ``dfmi.loc.__getitem__(idx)`` may be a view or a copy of ``dfmi``. - -Sometimes a ``SettingWithCopy`` warning will arise at times when there's no -obvious chained indexing going on. **These** are the bugs that -``SettingWithCopy`` is designed to catch! pandas is probably trying to warn you -that you've done this: - -.. code-block:: python - - def do_something(df): - foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! - # ... many lines here ... - # We don't know whether this will modify df or not! - foo['quux'] = value - return foo - -Yikes! - -.. _indexing.evaluation_order: - -Evaluation order matters -~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: - - :ref:`Copy-on-Write ` - will become the new default in pandas 3.0. This means than chained indexing will - never work. As a consequence, the ``SettingWithCopyWarning`` won't be necessary - anymore. - See :ref:`this section ` - for more context. - We recommend turning Copy-on-Write on to leverage the improvements with - - ``` - pd.options.mode.copy_on_write = True - ``` - - even before pandas 3.0 is available. - -When you use chained indexing, the order and type of the indexing operation -partially determine whether the result is a slice into the original object, or -a copy of the slice. - -pandas has the ``SettingWithCopyWarning`` because assigning to a copy of a -slice is frequently not intentional, but a mistake caused by chained indexing -returning a copy where a slice was expected. - -If you would like pandas to be more or less trusting about assignment to a -chained indexing expression, you can set the :ref:`option ` -``mode.chained_assignment`` to one of these values: - -* ``'warn'``, the default, means a ``SettingWithCopyWarning`` is printed. -* ``'raise'`` means pandas will raise a ``SettingWithCopyError`` - you have to deal with. -* ``None`` will suppress the warnings entirely. - -.. ipython:: python - :okwarning: - - dfb = pd.DataFrame({'a': ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c': np.arange(7)}) - - # This will show the SettingWithCopyWarning - # but the frame values will be set - dfb['c'][dfb['a'].str.startswith('o')] = 42 - -This however is operating on a copy and will not work. - -.. ipython:: python - :okwarning: - :okexcept: - - with pd.option_context('mode.chained_assignment','warn'): - dfb[dfb['a'].str.startswith('o')]['c'] = 42 - -A chained assignment can also crop up in setting in a mixed dtype frame. - -.. note:: - - These setting rules apply to all of ``.loc/.iloc``. - -The following is the recommended access method using ``.loc`` for multiple items (using ``mask``) and a single item using a fixed index: - -.. ipython:: python - - dfc = pd.DataFrame({'a': ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c': np.arange(7)}) - dfd = dfc.copy() - # Setting multiple items using a mask - mask = dfd['a'].str.startswith('o') - dfd.loc[mask, 'c'] = 42 - dfd - - # Setting a single item - dfd = dfc.copy() - dfd.loc[2, 'a'] = 11 - dfd - -The following *can* work at times, but it is not guaranteed to, and therefore should be avoided: - -.. ipython:: python - :okwarning: - - dfd = dfc.copy() - dfd['a'][2] = 111 - dfd - -Last, the subsequent example will **not** work at all, and so should be avoided: - -.. ipython:: python - :okwarning: - :okexcept: - - with pd.option_context('mode.chained_assignment','raise'): - dfd.loc[0]['a'] = 1111 - -.. warning:: - - The chained assignment warnings / exceptions are aiming to inform the user of a possibly invalid - assignment. There may be false positives; situations where a chained assignment is inadvertently - reported. +:ref:`Copy-on-Write ` is the new default with pandas 3.0. +This means than chained indexing will never work. +See :ref:`this section ` +for more context. diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index f2e29121760ab..a624e81d17db9 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -172,7 +172,7 @@ API changes statistical mode(s) by axis/Series. (:issue:`5367`) - Chained assignment will now by default warn if the user is assigning to a copy. This can be changed - with the option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. See :ref:`the docs`. + with the option ``mode.chained_assignment``, allowed options are ``raise/warn/None``. .. ipython:: python diff --git a/doc/source/whatsnew/v0.13.1.rst b/doc/source/whatsnew/v0.13.1.rst index 8c85868e1aedb..483dd15a8467a 100644 --- a/doc/source/whatsnew/v0.13.1.rst +++ b/doc/source/whatsnew/v0.13.1.rst @@ -24,8 +24,8 @@ Highlights include: .. warning:: 0.13.1 fixes a bug that was caused by a combination of having numpy < 1.8, and doing - chained assignment on a string-like array. Please review :ref:`the docs`, - chained indexing can have unexpected results and should generally be avoided. + chained assignment on a string-like array. + Chained indexing can have unexpected results and should generally be avoided. This would previously segfault: diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8fa1361cc30c1..f4cd57af105dd 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -383,7 +383,7 @@ Other enhancements - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) - Added ``numeric_only`` argument to :meth:`.Resampler.sum`, :meth:`.Resampler.prod`, :meth:`.Resampler.min`, :meth:`.Resampler.max`, :meth:`.Resampler.first`, and :meth:`.Resampler.last` (:issue:`46442`) - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) -- :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) +- :class:`.DataError`, :class:`.SpecificationError`, ``SettingWithCopyError``, ``SettingWithCopyWarning``, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) - Add support for :meth:`.DataFrameGroupBy.ohlc` and :meth:`.SeriesGroupBy.ohlc` for extension array dtypes (:issue:`37493`) - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 9784303fc0b87..c43d59654b44c 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -34,11 +34,6 @@ def using_copy_on_write() -> bool: return True -def using_nullable_dtypes() -> bool: - _mode_options = _global_config["mode"] - return _mode_options["nullable_dtypes"] - - def using_pyarrow_string_dtype() -> bool: _mode_options = _global_config["future"] return _mode_options["infer_string"] diff --git a/pandas/core/apply.py b/pandas/core/apply.py index ebd714f9c14d4..7ae65ba11a752 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -16,8 +16,6 @@ import numpy as np -from pandas._config import option_context - from pandas._libs import lib from pandas._libs.internals import BlockValuesRefs from pandas._typing import ( @@ -1076,14 +1074,12 @@ def apply_series_generator(self) -> tuple[ResType, Index]: results = {} - with option_context("mode.chained_assignment", None): - for i, v in enumerate(series_gen): - # ignore SettingWithCopy here in case the user mutates - results[i] = self.func(v, *self.args, **self.kwargs) - if isinstance(results[i], ABCSeries): - # If we have a view on v, we need to make a copy because - # series_generator will swap out the underlying data - results[i] = results[i].copy(deep=False) + for i, v in enumerate(series_gen): + results[i] = self.func(v, *self.args, **self.kwargs) + if isinstance(results[i], ABCSeries): + # If we have a view on v, we need to make a copy because + # series_generator will swap out the underlying data + results[i] = results[i].copy(deep=False) return results, res_index diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b8b5df6e5145b..afa680d064c4a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1446,12 +1446,8 @@ def style(self) -> Styler: @Appender(_shared_docs["items"]) def items(self) -> Iterable[tuple[Hashable, Series]]: - if self.columns.is_unique and hasattr(self, "_item_cache"): - for k in self.columns: - yield k, self._get_item_cache(k) - else: - for i, k in enumerate(self.columns): - yield k, self._ixs(i, axis=1) + for i, k in enumerate(self.columns): + yield k, self._ixs(i, axis=1) def iterrows(self) -> Iterable[tuple[Hashable, Series]]: """ @@ -3921,24 +3917,14 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Series: if axis == 0: new_mgr = self._mgr.fast_xs(i) - # if we are a copy, mark as such - copy = isinstance(new_mgr.array, np.ndarray) and new_mgr.array.base is None result = self._constructor_sliced_from_mgr(new_mgr, axes=new_mgr.axes) result._name = self.index[i] - result = result.__finalize__(self) - result._set_is_copy(self, copy=copy) - return result + return result.__finalize__(self) # icol else: - label = self.columns[i] - col_mgr = self._mgr.iget(i) - result = self._box_col_values(col_mgr, i) - - # this is a cached value, mark it so - result._set_as_cached(label, self) - return result + return self._box_col_values(col_mgr, i) def _get_column_array(self, i: int) -> ArrayLike: """ @@ -3998,7 +3984,7 @@ def __getitem__(self, key): and key in self.columns or key in self.columns.drop_duplicates(keep=False) ): - return self._get_item_cache(key) + return self._get_item(key) elif is_mi and self.columns.is_unique and key in self.columns: return self._getitem_multilevel(key) @@ -4037,7 +4023,7 @@ def __getitem__(self, key): if isinstance(indexer, slice): return self._slice(indexer, axis=1) - data = self._take_with_is_copy(indexer, axis=1) + data = self.take(indexer, axis=1) if is_single_key: # What does looking for a single key in a non-unique index return? @@ -4046,7 +4032,7 @@ def __getitem__(self, key): # - we have a MultiIndex on columns (test on self.columns, #21309) if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex): # GH#26490 using data[key] can cause RecursionError - return data._get_item_cache(key) + return data._get_item(key) return data @@ -4075,7 +4061,7 @@ def _getitem_bool_array(self, key): return self.copy(deep=None) indexer = key.nonzero()[0] - return self._take_with_is_copy(indexer, axis=0) + return self.take(indexer, axis=0) def _getitem_multilevel(self, key): # self.columns is a MultiIndex @@ -4105,7 +4091,6 @@ def _getitem_multilevel(self, key): result, index=self.index, name=key ) - result._set_is_copy(self) return result else: # loc is neither a slice nor ndarray, so must be an int @@ -4134,7 +4119,7 @@ def _get_value(self, index, col, takeable: bool = False) -> Scalar: series = self._ixs(col, axis=1) return series._values[index] - series = self._get_item_cache(col) + series = self._get_item(col) engine = self.index._engine if not isinstance(self.index, MultiIndex): @@ -4226,7 +4211,6 @@ def _setitem_slice(self, key: slice, value) -> None: # NB: we can't just use self.loc[key] = value because that # operates on labels and we need to operate positional for # backwards-compat, xref GH#31469 - self._check_setitem_copy() self.iloc[key] = value def _setitem_array(self, key, value): @@ -4239,7 +4223,6 @@ def _setitem_array(self, key, value): ) key = check_bool_indexer(self.index, key) indexer = key.nonzero()[0] - self._check_setitem_copy() if isinstance(value, DataFrame): # GH#39931 reindex since iloc does not align value = value.reindex(self.index.take(indexer)) @@ -4326,7 +4309,6 @@ def _setitem_frame(self, key, value) -> None: "Must pass DataFrame or 2-d ndarray with boolean values only" ) - self._check_setitem_copy() self._where(-key, value, inplace=True) def _set_item_frame_value(self, key, value: DataFrame) -> None: @@ -4388,7 +4370,6 @@ def _iset_item_mgr( ) -> None: # when called from _set_item_mgr loc can be anything returned from get_loc self._mgr.iset(loc, value, inplace=inplace, refs=refs) - self._clear_item_cache() def _set_item_mgr( self, key, value: ArrayLike, refs: BlockValuesRefs | None = None @@ -4401,12 +4382,6 @@ def _set_item_mgr( else: self._iset_item_mgr(loc, value, refs=refs) - # check if we are modifying a copy - # try to set first as we want an invalid - # value exception to occur first - if len(self): - self._check_setitem_copy() - def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: # We are only called from _replace_columnwise which guarantees that # no reindex is necessary @@ -4417,12 +4392,6 @@ def _iset_item(self, loc: int, value: Series, inplace: bool = True) -> None: else: self._iset_item_mgr(loc, value._values.copy(), inplace=True) - # check if we are modifying a copy - # try to set first as we want an invalid - # value exception to occur first - if len(self): - self._check_setitem_copy() - def _set_item(self, key, value) -> None: """ Add series to DataFrame in specified column. @@ -4473,7 +4442,6 @@ def _set_value( icol = self.columns.get_loc(col) iindex = self.index.get_loc(index) self._mgr.column_setitem(icol, iindex, value, inplace_only=True) - self._clear_item_cache() except (KeyError, TypeError, ValueError, LossySetitemError): # get_loc might raise a KeyError for missing labels (falling back @@ -4485,7 +4453,6 @@ def _set_value( self.iloc[index, col] = value else: self.loc[index, col] = value - self._item_cache.pop(col, None) except InvalidIndexError as ii_err: # GH48729: Seems like you are trying to assign a value to a @@ -4529,50 +4496,9 @@ def _box_col_values(self, values: SingleBlockManager, loc: int) -> Series: obj._name = name return obj.__finalize__(self) - # ---------------------------------------------------------------------- - # Lookup Caching - - def _clear_item_cache(self) -> None: - self._item_cache.clear() - - def _get_item_cache(self, item: Hashable) -> Series: - """Return the cached item, item represents a label indexer.""" - if using_copy_on_write(): - loc = self.columns.get_loc(item) - return self._ixs(loc, axis=1) - - cache = self._item_cache - res = cache.get(item) - if res is None: - # All places that call _get_item_cache have unique columns, - # pending resolution of GH#33047 - - loc = self.columns.get_loc(item) - res = self._ixs(loc, axis=1) - - cache[item] = res - - # for a chain - res._is_copy = self._is_copy - return res - - def _reset_cacher(self) -> None: - # no-op for DataFrame - pass - - def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: - """ - The object has called back to us saying maybe it has changed. - """ - loc = self._info_axis.get_loc(item) - arraylike = value._values - - old = self._ixs(loc, axis=1) - if old._values is value._values and inplace: - # GH#46149 avoid making unnecessary copies/block-splitting - return - - self._mgr.iset(loc, arraylike, inplace=inplace) + def _get_item(self, item: Hashable) -> Series: + loc = self.columns.get_loc(item) + return self._ixs(loc, axis=1) # ---------------------------------------------------------------------- # Unsorted diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 676b3741f9843..1503bb37adc29 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5,7 +5,6 @@ from copy import deepcopy import datetime as dt from functools import partial -import gc from json import loads import operator import pickle @@ -23,7 +22,6 @@ overload, ) import warnings -import weakref import numpy as np @@ -97,8 +95,6 @@ AbstractMethodError, ChainedAssignmentError, InvalidIndexError, - SettingWithCopyError, - SettingWithCopyWarning, ) from pandas.errors.cow import ( _chained_assignment_method_msg, @@ -252,10 +248,8 @@ class NDFrame(PandasObject, indexing.IndexingMixin): _internal_names: list[str] = [ "_mgr", - "_cacher", "_item_cache", "_cache", - "_is_copy", "_name", "_metadata", "_flags", @@ -264,7 +258,6 @@ class NDFrame(PandasObject, indexing.IndexingMixin): _accessors: set[str] = set() _hidden_attrs: frozenset[str] = frozenset([]) _metadata: list[str] = [] - _is_copy: weakref.ReferenceType[NDFrame] | str | None = None _mgr: Manager _attrs: dict[Hashable, Any] _typ: str @@ -273,9 +266,7 @@ class NDFrame(PandasObject, indexing.IndexingMixin): # Constructors def __init__(self, data: Manager) -> None: - object.__setattr__(self, "_is_copy", None) object.__setattr__(self, "_mgr", data) - object.__setattr__(self, "_item_cache", {}) object.__setattr__(self, "_attrs", {}) object.__setattr__(self, "_flags", Flags(self, allows_duplicate_labels=True)) @@ -787,7 +778,6 @@ def _set_axis(self, axis: AxisInt, labels: AnyArrayLike | list) -> None: """ labels = ensure_index(labels) self._mgr.set_axis(axis, labels) - self._clear_item_cache() @final def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self: @@ -1105,7 +1095,6 @@ def _rename( new_index = ax._transform_index(f, level=level) result._set_axis_nocheck(new_index, axis=axis_no, inplace=True, copy=False) - result._clear_item_cache() if inplace: self._update_inplace(result) @@ -2193,8 +2182,6 @@ def __setstate__(self, state) -> None: elif len(state) == 2: raise NotImplementedError("Pre-0.12 pickles are no longer supported") - self._item_cache: dict[Hashable, Series] = {} - # ---------------------------------------------------------------------- # Rendering Methods @@ -3962,44 +3949,6 @@ def to_csv( storage_options=storage_options, ) - # ---------------------------------------------------------------------- - # Lookup Caching - - def _reset_cacher(self) -> None: - """ - Reset the cacher. - """ - raise AbstractMethodError(self) - - def _maybe_update_cacher( - self, - clear: bool_t = False, - verify_is_copy: bool_t = True, - inplace: bool_t = False, - ) -> None: - """ - See if we need to update our parent cacher if clear, then clear our - cache. - - Parameters - ---------- - clear : bool, default False - Clear the item cache. - verify_is_copy : bool, default True - Provide is_copy checks. - """ - if using_copy_on_write(): - return - - if verify_is_copy: - self._check_setitem_copy(t="referent") - - if clear: - self._clear_item_cache() - - def _clear_item_cache(self) -> None: - raise AbstractMethodError(self) - # ---------------------------------------------------------------------- # Indexing Methods @@ -4118,23 +4067,6 @@ class max_speed self, method="take" ) - @final - def _take_with_is_copy(self, indices, axis: Axis = 0) -> Self: - """ - Internal version of the `take` method that sets the `_is_copy` - attribute to keep track of the parent dataframe (using in indexing - for the SettingWithCopyWarning). - - For Series this does the same as the public take (it never sets `_is_copy`). - - See the docstring of `take` for full explanation of the parameters. - """ - result = self.take(indices=indices, axis=axis) - # Maybe set copy if we didn't actually change the index. - if self.ndim == 2 and not result._get_axis(axis).equals(self._get_axis(axis)): - result._set_is_copy(self) - return result - @final def xs( self, @@ -4282,9 +4214,9 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: (inds,) = loc.nonzero() - return self._take_with_is_copy(inds, axis=axis) + return self.take(inds, axis=axis) else: - return self._take_with_is_copy(loc, axis=axis) + return self.take(loc, axis=axis) if not is_scalar(loc): new_index = index[loc] @@ -4310,9 +4242,6 @@ class animal locomotion result = self.iloc[loc] result.index = new_index - # this could be a view - # but only in a single-dtyped view sliceable case - result._set_is_copy(self, copy=not result._is_view) return result def __getitem__(self, item): @@ -4348,111 +4277,8 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: new_mgr = self._mgr.get_slice(slobj, axis=axis) result = self._constructor_from_mgr(new_mgr, axes=new_mgr.axes) result = result.__finalize__(self) - - # this could be a view - # but only in a single-dtyped view sliceable case - is_copy = axis != 0 or result._is_view - result._set_is_copy(self, copy=is_copy) return result - @final - def _set_is_copy(self, ref: NDFrame, copy: bool_t = True) -> None: - if not copy: - self._is_copy = None - else: - assert ref is not None - self._is_copy = weakref.ref(ref) - - def _check_is_chained_assignment_possible(self) -> bool_t: - """ - Check if we are a view, have a cacher, and are of mixed type. - If so, then force a setitem_copy check. - - Should be called just near setting a value - - Will return a boolean if it we are a view and are cached, but a - single-dtype meaning that the cacher should be updated following - setting. - """ - if self._is_copy: - self._check_setitem_copy(t="referent") - return False - - @final - def _check_setitem_copy(self, t: str = "setting", force: bool_t = False) -> None: - """ - - Parameters - ---------- - t : str, the type of setting error - force : bool, default False - If True, then force showing an error. - - validate if we are doing a setitem on a chained copy. - - It is technically possible to figure out that we are setting on - a copy even WITH a multi-dtyped pandas object. In other words, some - blocks may be views while other are not. Currently _is_view will ALWAYS - return False for multi-blocks to avoid having to handle this case. - - df = DataFrame(np.arange(0,9), columns=['count']) - df['group'] = 'b' - - # This technically need not raise SettingWithCopy if both are view - # (which is not generally guaranteed but is usually True. However, - # this is in general not a good practice and we recommend using .loc. - df.iloc[0:5]['group'] = 'a' - - """ - if using_copy_on_write(): - return - - # return early if the check is not needed - if not (force or self._is_copy): - return - - value = config.get_option("mode.chained_assignment") - if value is None: - return - - # see if the copy is not actually referred; if so, then dissolve - # the copy weakref - if self._is_copy is not None and not isinstance(self._is_copy, str): - r = self._is_copy() - if not gc.get_referents(r) or (r is not None and r.shape == self.shape): - self._is_copy = None - return - - # a custom message - if isinstance(self._is_copy, str): - t = self._is_copy - - elif t == "referent": - t = ( - "\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame\n\n" - "See the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - else: - t = ( - "\n" - "A value is trying to be set on a copy of a slice from a " - "DataFrame.\n" - "Try using .loc[row_indexer,col_indexer] = value " - "instead\n\nSee the caveats in the documentation: " - "https://pandas.pydata.org/pandas-docs/stable/user_guide/" - "indexing.html#returning-a-view-versus-a-copy" - ) - - if value == "raise": - raise SettingWithCopyError(t) - if value == "warn": - warnings.warn(t, SettingWithCopyWarning, stacklevel=find_stack_level()) - @final def __delitem__(self, key) -> None: """ @@ -4485,12 +4311,6 @@ def __delitem__(self, key) -> None: loc = self.axes[-1].get_loc(key) self._mgr = self._mgr.idelete(loc) - # delete from the caches - try: - del self._item_cache[key] - except KeyError: - pass - # ---------------------------------------------------------------------- # Unsorted @@ -4860,22 +4680,17 @@ def _drop_axis( return result.__finalize__(self) @final - def _update_inplace(self, result, verify_is_copy: bool_t = True) -> None: + def _update_inplace(self, result) -> None: """ Replace self internals with result. Parameters ---------- result : same type as self - verify_is_copy : bool, default True - Provide is_copy checks. """ # NOTE: This does *not* call __finalize__ and that's an explicit # decision that we may revisit in the future. - self._reset_cache() - self._clear_item_cache() self._mgr = result._mgr - self._maybe_update_cacher(verify_is_copy=verify_is_copy, inplace=True) @final def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self: @@ -6351,26 +6166,11 @@ def _dir_additions(self) -> set[str]: # ---------------------------------------------------------------------- # Consolidation of internals - @final - def _protect_consolidate(self, f): - """ - Consolidate _mgr -- if the blocks have changed, then clear the - cache - """ - blocks_before = len(self._mgr.blocks) - result = f() - if len(self._mgr.blocks) != blocks_before: - self._clear_item_cache() - return result - @final def _consolidate_inplace(self) -> None: """Consolidate data in place and return None""" - def f() -> None: - self._mgr = self._mgr.consolidate() - - self._protect_consolidate(f) + self._mgr = self._mgr.consolidate() @final def _consolidate(self): @@ -6382,8 +6182,7 @@ def _consolidate(self): ------- consolidated : same type as caller """ - f = lambda: self._mgr.consolidate() - cons_data = self._protect_consolidate(f) + cons_data = self._mgr.consolidate() return self._constructor_from_mgr(cons_data, axes=cons_data.axes).__finalize__( self ) @@ -6789,7 +6588,6 @@ def copy(self, deep: bool_t | None = True) -> Self: dtype: object """ data = self._mgr.copy(deep=deep) - self._clear_item_cache() return self._constructor_from_mgr(data, axes=data.axes).__finalize__( self, method="copy" ) @@ -9182,7 +8980,7 @@ def at_time(self, time, asof: bool_t = False, axis: Axis | None = None) -> Self: raise TypeError("Index must be DatetimeIndex") indexer = index.indexer_at_time(time, asof=asof) - return self._take_with_is_copy(indexer, axis=axis) + return self.take(indexer, axis=axis) @final def between_time( @@ -9267,7 +9065,7 @@ def between_time( include_start=left_inclusive, include_end=right_inclusive, ) - return self._take_with_is_copy(indexer, axis=axis) + return self.take(indexer, axis=axis) @final @doc(klass=_shared_doc_kwargs["klass"]) @@ -12317,14 +12115,9 @@ def _inplace_method(self, other, op) -> Self: """ result = op(self, other) - # Delete cacher - self._reset_cacher() - # this makes sure that we are aligned like the input - # we are updating inplace so we want to ignore is_copy - self._update_inplace( - result.reindex_like(self, copy=False), verify_is_copy=False - ) + # we are updating inplace + self._update_inplace(result.reindex_like(self, copy=False)) return self @final diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 64f882e5a146c..fa79b23b8209e 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -34,8 +34,6 @@ class providing the base-class of operations. import numpy as np -from pandas._config.config import option_context - from pandas._libs import ( Timestamp, lib, @@ -1742,32 +1740,28 @@ def f(g): if not include_groups: return self._python_apply_general(f, self._obj_with_exclusions) - # ignore SettingWithCopy here in case the user mutates - with option_context("mode.chained_assignment", None): - try: - result = self._python_apply_general(f, self._selected_obj) - if ( - not isinstance(self.obj, Series) - and self._selection is None - and self._selected_obj.shape != self._obj_with_exclusions.shape - ): - warnings.warn( - message=_apply_groupings_depr.format( - type(self).__name__, "apply" - ), - category=DeprecationWarning, - stacklevel=find_stack_level(), - ) - except TypeError: - # gh-20949 - # try again, with .apply acting as a filtering - # operation, by excluding the grouping column - # This would normally not be triggered - # except if the udf is trying an operation that - # fails on *some* columns, e.g. a numeric operation - # on a string grouper column - - return self._python_apply_general(f, self._obj_with_exclusions) + try: + result = self._python_apply_general(f, self._selected_obj) + if ( + not isinstance(self.obj, Series) + and self._selection is None + and self._selected_obj.shape != self._obj_with_exclusions.shape + ): + warnings.warn( + message=_apply_groupings_depr.format(type(self).__name__, "apply"), + category=DeprecationWarning, + stacklevel=find_stack_level(), + ) + except TypeError: + # gh-20949 + # try again, with .apply acting as a filtering + # operation, by excluding the grouping column + # This would normally not be triggered + # except if the udf is trying an operation that + # fails on *some* columns, e.g. a numeric operation + # on a string grouper column + + return self._python_apply_general(f, self._obj_with_exclusions) return result diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 7e3ba4089ff60..1a24ae8530c12 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -106,16 +106,7 @@ def _delegate_property_get(self, name: str): else: index = self._parent.index # return the result as a Series - result = Series(result, index=index, name=self.name).__finalize__(self._parent) - - # setting this object will show a SettingWithCopyWarning/Error - result._is_copy = ( - "modifications to a property of a datetimelike " - "object are not supported and are discarded. " - "Change values on the original." - ) - - return result + return Series(result, index=index, name=self.name).__finalize__(self._parent) def _delegate_property_set(self, name: str, value, *args, **kwargs): raise ValueError( @@ -134,19 +125,10 @@ def _delegate_method(self, name: str, *args, **kwargs): if not is_list_like(result): return result - result = Series(result, index=self._parent.index, name=self.name).__finalize__( + return Series(result, index=self._parent.index, name=self.name).__finalize__( self._parent ) - # setting this object will show a SettingWithCopyWarning/Error - result._is_copy = ( - "modifications to a method of a datetimelike " - "object are not supported and are discarded. " - "Change values on the original." - ) - - return result - @delegate_names( delegate=ArrowExtensionArray, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b58c3179dec09..ab06dd3ea5af0 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1194,7 +1194,7 @@ def _getbool_axis(self, key, axis: AxisInt): labels = self.obj._get_axis(axis) key = check_bool_indexer(labels, key) inds = key.nonzero()[0] - return self.obj._take_with_is_copy(inds, axis=axis) + return self.obj.take(inds, axis=axis) @doc(IndexingMixin.loc) @@ -1697,7 +1697,7 @@ def _get_list_axis(self, key, axis: AxisInt): `axis` can only be zero. """ try: - return self.obj._take_with_is_copy(key, axis=axis) + return self.obj.take(key, axis=axis) except IndexError as err: # re-raise with different error message, e.g. test_getitem_ndarray_3d raise IndexError("positional indexers are out-of-bounds") from err @@ -1905,8 +1905,6 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None: reindexers, allow_dups=True ) self.obj._mgr = new_obj._mgr - self.obj._maybe_update_cacher(clear=True) - self.obj._is_copy = None nindexer.append(labels.get_loc(key)) @@ -2154,8 +2152,6 @@ def _setitem_single_column(self, loc: int, value, plane_indexer) -> None: # falling back to casting if necessary) self.obj._mgr.column_setitem(loc, plane_indexer, value) - self.obj._clear_item_cache() - def _setitem_single_block(self, indexer, value, name: str) -> None: """ _setitem_with_indexer for the case when we have a single Block. @@ -2191,12 +2187,8 @@ def _setitem_single_block(self, indexer, value, name: str) -> None: if isinstance(value, ABCDataFrame) and name != "iloc": value = self._align_frame(indexer, value)._values - # check for chained assignment - self.obj._check_is_chained_assignment_possible() - # actually do the set self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) - self.obj._maybe_update_cacher(clear=True, inplace=True) def _setitem_with_indexer_missing(self, indexer, value): """ @@ -2262,7 +2254,6 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._mgr = self.obj._constructor( new_values, index=new_index, name=self.obj.name )._mgr - self.obj._maybe_update_cacher(clear=True) elif self.ndim == 2: if not len(self.obj.columns): @@ -2306,7 +2297,6 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._mgr = df._mgr else: self.obj._mgr = self.obj._append(value)._mgr - self.obj._maybe_update_cacher(clear=True) def _ensure_iterable_column_indexer(self, column_indexer): """ diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 73f492c83c2ff..390f5e0d0d5ae 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -10,7 +10,6 @@ import numpy as np from pandas.compat._optional import import_optional_dependency -from pandas.errors import SettingWithCopyError import pandas as pd from pandas.core.interchange.dataframe_protocol import ( @@ -548,9 +547,5 @@ def set_nulls( # cast the `data` to nullable float dtype. data = data.astype(float) data[null_pos] = None - except SettingWithCopyError: - # `SettingWithCopyError` may happen for datetime-like with missing values. - data = data.copy() - data[null_pos] = None return data diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index fa54fde2ece84..5a8a14168d504 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -690,7 +690,7 @@ def is_view(self) -> bool: # e.g. [ b.values.base is not None for b in self.blocks ] # but then we have the case of possibly some blocks being a view # and some blocks not. setting in theory is possible on the non-view - # blocks w/o causing a SettingWithCopy raise/warn. But this is a bit + # blocks. But this is a bit # complicated return False diff --git a/pandas/core/series.py b/pandas/core/series.py index 94be7bdbaca16..e9d340237c234 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -22,7 +22,6 @@ overload, ) import warnings -import weakref import numpy as np @@ -1239,7 +1238,6 @@ def __setitem__(self, key, value) -> None: check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) - cacher_needs_updating = self._check_is_chained_assignment_possible() if key is Ellipsis: key = slice(None) @@ -1317,9 +1315,6 @@ def __setitem__(self, key, value) -> None: else: self._set_with(key, value) - if cacher_needs_updating: - self._maybe_update_cacher(inplace=True) - def _set_with_engine(self, key, value) -> None: loc = self.index.get_loc(key) @@ -1371,7 +1366,6 @@ def _set_values(self, key, value) -> None: key = key._values self._mgr = self._mgr.setitem(indexer=key, value=value) - self._maybe_update_cacher() def _set_value(self, label, value, takeable: bool = False) -> None: """ @@ -1400,84 +1394,6 @@ def _set_value(self, label, value, takeable: bool = False) -> None: self._set_values(loc, value) - # ---------------------------------------------------------------------- - # Lookup Caching - - @property - def _is_cached(self) -> bool: - """Return boolean indicating if self is cached or not.""" - return getattr(self, "_cacher", None) is not None - - def _get_cacher(self): - """return my cacher or None""" - cacher = getattr(self, "_cacher", None) - if cacher is not None: - cacher = cacher[1]() - return cacher - - def _reset_cacher(self) -> None: - """ - Reset the cacher. - """ - if hasattr(self, "_cacher"): - del self._cacher - - def _set_as_cached(self, item, cacher) -> None: - """ - Set the _cacher attribute on the calling object with a weakref to - cacher. - """ - if using_copy_on_write(): - return - self._cacher = (item, weakref.ref(cacher)) - - def _clear_item_cache(self) -> None: - # no-op for Series - pass - - def _check_is_chained_assignment_possible(self) -> bool: - """ - See NDFrame._check_is_chained_assignment_possible.__doc__ - """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(t="referent", force=True) - return True - return super()._check_is_chained_assignment_possible() - - def _maybe_update_cacher( - self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False - ) -> None: - """ - See NDFrame._maybe_update_cacher.__doc__ - """ - # for CoW, we never want to update the parent DataFrame cache - # if the Series changed, but don't keep track of any cacher - if using_copy_on_write(): - return - cacher = getattr(self, "_cacher", None) - if cacher is not None: - ref: DataFrame = cacher[1]() - - # we are trying to reference a dead referent, hence - # a copy - if ref is None: - del self._cacher - elif len(self) == len(ref) and self.name in ref.columns: - # GH#42530 self.name must be in ref.columns - # to ensure column still in dataframe - # otherwise, either self or ref has swapped in new arrays - ref._maybe_cache_changed(cacher[0], self, inplace=inplace) - else: - # GH#33675 we have swapped in a new array, so parent - # reference to self is now invalid - ref._item_cache.pop(cacher[0], None) - - super()._maybe_update_cacher( - clear=clear, verify_is_copy=verify_is_copy, inplace=inplace - ) - # ---------------------------------------------------------------------- # Unsorted @@ -3578,7 +3494,6 @@ def update(self, other: Series | Sequence | Mapping) -> None: mask = notna(other) self._mgr = self._mgr.putmask(mask=mask, new=other) - self._maybe_update_cacher() # ---------------------------------------------------------------------- # Reindexing, sorting @@ -3782,13 +3697,6 @@ def sort_values( # Validate the axis parameter self._get_axis_number(axis) - # GH 5856/5853 - if inplace and self._is_cached: - raise ValueError( - "This Series is a view of some other array, to " - "sort in-place you must create a copy" - ) - if is_list_like(ascending): ascending = cast(Sequence[bool], ascending) if len(ascending) != 1: diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 52b896dc01e8f..97db508bda1b4 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -408,50 +408,6 @@ class SpecificationError(Exception): """ -class SettingWithCopyError(ValueError): - """ - Exception raised when trying to set on a copied slice from a ``DataFrame``. - - The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can - happen unintentionally when chained indexing. - - For more information on evaluation order, - see :ref:`the user guide`. - - For more information on view vs. copy, - see :ref:`the user guide`. - - Examples - -------- - >>> pd.options.mode.chained_assignment = 'raise' - >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) - >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP - ... # SettingWithCopyError: A value is trying to be set on a copy of a... - """ - - -class SettingWithCopyWarning(Warning): - """ - Warning raised when trying to set on a copied slice from a ``DataFrame``. - - The ``mode.chained_assignment`` needs to be set to set to 'warn.' - 'Warn' is the default option. This can happen unintentionally when - chained indexing. - - For more information on evaluation order, - see :ref:`the user guide`. - - For more information on view vs. copy, - see :ref:`the user guide`. - - Examples - -------- - >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) - >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP - ... # SettingWithCopyWarning: A value is trying to be set on a copy of a... - """ - - class ChainedAssignmentError(Warning): """ Warning raised when trying to set using chained assignment. @@ -462,8 +418,8 @@ class ChainedAssignmentError(Warning): Copy-on-Write always behaves as a copy. Thus, assigning through a chain can never update the original Series or DataFrame. - For more information on view vs. copy, - see :ref:`the user guide`. + For more information on Copy-on-Write, + see :ref:`the user guide`. Examples -------- @@ -787,8 +743,6 @@ class InvalidComparison(Exception): "PossiblePrecisionLoss", "PyperclipException", "PyperclipWindowsException", - "SettingWithCopyError", - "SettingWithCopyWarning", "SpecificationError", "UndefinedVariableError", "UnsortedIndexError", diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 594fb8651f8f0..cea34cdfb0b9d 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -251,8 +251,6 @@ def __init__( self.default_handler = default_handler self.index = index self.indent = indent - - self.is_copy = None self._format_axes() def _format_axes(self) -> None: diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index cfa9cf64357b6..e1a76e66c107f 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -1,15 +1,9 @@ import numpy as np import pytest -from pandas.errors import ( - ChainedAssignmentError, - SettingWithCopyWarning, -) +from pandas.errors import ChainedAssignmentError -from pandas import ( - DataFrame, - option_context, -) +from pandas import DataFrame import pandas._testing as tm @@ -53,15 +47,11 @@ def test_series_setitem(indexer, using_copy_on_write): assert "ChainedAssignmentError" in record[0].message.args[0] -@pytest.mark.filterwarnings("ignore::pandas.errors.SettingWithCopyWarning") @pytest.mark.parametrize( "indexer", ["a", ["a", "b"], slice(0, 2), np.array([True, False, True])] ) -def test_frame_setitem(indexer, using_copy_on_write): +def test_frame_setitem(indexer): df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) - extra_warnings = () if using_copy_on_write else (SettingWithCopyWarning,) - - with option_context("chained_assignment", "warn"): - with tm.raises_chained_assignment_error(extra_warnings=extra_warnings): - df[0:3][indexer] = 10 + with tm.raises_chained_assignment_error(): + df[0:3][indexer] = 10 diff --git a/pandas/tests/copy_view/test_clip.py b/pandas/tests/copy_view/test_clip.py index 9be9ba6f144c4..c18a2e1e65d26 100644 --- a/pandas/tests/copy_view/test_clip.py +++ b/pandas/tests/copy_view/test_clip.py @@ -1,9 +1,6 @@ import numpy as np -from pandas import ( - DataFrame, - option_context, -) +from pandas import DataFrame import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -89,9 +86,7 @@ def test_clip_chained_inplace(using_copy_on_write): df["a"].clip(1, 2, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[["a"]].clip(1, 2, inplace=True) + df[["a"]].clip(1, 2, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - df[df["a"] > 1].clip(1, 2, inplace=True) + df[df["a"] > 1].clip(1, 2, inplace=True) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 69fb8fe2c6f63..da72e89b23ca0 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyWarning - from pandas.core.dtypes.common import is_float_dtype import pandas as pd @@ -59,17 +57,10 @@ def test_subset_column_selection(backend, using_copy_on_write): subset = df[["a", "c"]] - if using_copy_on_write: - # the subset shares memory ... - assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - # ... but uses CoW when being modified - subset.iloc[0, 0] = 0 - else: - assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - # INFO this no longer raise warning since pandas 1.4 - # with pd.option_context("chained_assignment", "warn"): - # with tm.assert_produces_warning(SettingWithCopyWarning): - subset.iloc[0, 0] = 0 + # the subset shares memory ... + assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) + # ... but uses CoW when being modified + subset.iloc[0, 0] = 0 assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) @@ -113,37 +104,24 @@ def test_subset_row_slice(backend, using_copy_on_write): assert np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - if using_copy_on_write: - subset.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) - - else: - # INFO this no longer raise warning since pandas 1.4 - # with pd.option_context("chained_assignment", "warn"): - # with tm.assert_produces_warning(SettingWithCopyWarning): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(subset, "a"), get_array(df, "a")) subset._mgr._verify_integrity() expected = DataFrame({"a": [0, 3], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3)) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.iloc[1, 0] = 0 - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_column_slice(backend, using_copy_on_write, dtype): +def test_subset_column_slice(backend, dtype): # Case: taking a subset of the columns of a DataFrame using a slice # + afterwards modifying the subset dtype_backend, DataFrame, _ = backend - single_block = dtype == "int64" and dtype_backend == "numpy" df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} ) @@ -152,27 +130,16 @@ def test_subset_column_slice(backend, using_copy_on_write, dtype): subset = df.iloc[:, 1:] subset._mgr._verify_integrity() - if using_copy_on_write: - assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) + assert np.shares_memory(get_array(subset, "b"), get_array(df, "b")) - subset.iloc[0, 0] = 0 - assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) - else: - # we only get a warning in case of a single block - warn = SettingWithCopyWarning if single_block else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - subset.iloc[0, 0] = 0 + subset.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(subset, "b"), get_array(df, "b")) expected = DataFrame({"b": [0, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}) tm.assert_frame_equal(subset, expected) # original parent dataframe is not modified (also not for BlockManager case, # except for single block) - if not using_copy_on_write and single_block: - df_orig.iloc[0, 1] = 0 - tm.assert_frame_equal(df, df_orig) - else: - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize( @@ -288,7 +255,7 @@ def test_subset_iloc_rows_columns( [slice(0, 2), np.array([True, True, False]), np.array([0, 1])], ids=["slice", "mask", "array"], ) -def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on_write): +def test_subset_set_with_row_indexer(backend, indexer_si, indexer): # Case: setting values with a row indexer on a viewing subset # subset[indexer] = value and subset.iloc[indexer] = value _, DataFrame, _ = backend @@ -303,29 +270,17 @@ def test_subset_set_with_row_indexer(backend, indexer_si, indexer, using_copy_on ): pytest.skip("setitem with labels selects on columns") - if using_copy_on_write: - indexer_si(subset)[indexer] = 0 - else: - # INFO iloc no longer raises warning since pandas 1.4 - warn = SettingWithCopyWarning if indexer_si is tm.setitem else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - indexer_si(subset)[indexer] = 0 + indexer_si(subset)[indexer] = 0 expected = DataFrame( {"a": [0, 0, 4], "b": [0, 0, 7], "c": [0.0, 0.0, 0.4]}, index=range(1, 4) ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig[1:3] = 0 - tm.assert_frame_equal(df, df_orig) + # original parent dataframe is not modified (CoW) + tm.assert_frame_equal(df, df_orig) -def test_subset_set_with_mask(backend, using_copy_on_write): +def test_subset_set_with_mask(backend): # Case: setting values with a mask on a viewing subset: subset[mask] = value _, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3, 4], "b": [4, 5, 6, 7], "c": [0.1, 0.2, 0.3, 0.4]}) @@ -334,28 +289,16 @@ def test_subset_set_with_mask(backend, using_copy_on_write): mask = subset > 3 - if using_copy_on_write: - subset[mask] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - subset[mask] = 0 + subset[mask] = 0 expected = DataFrame( {"a": [2, 3, 0], "b": [0, 0, 0], "c": [0.20, 0.3, 0.4]}, index=range(1, 4) ) tm.assert_frame_equal(subset, expected) - if using_copy_on_write: - # original parent dataframe is not modified (CoW) - tm.assert_frame_equal(df, df_orig) - else: - # original parent dataframe is actually updated - df_orig.loc[3, "a"] = 0 - df_orig.loc[1:3, "b"] = 0 - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) -def test_subset_set_column(backend, using_copy_on_write): +def test_subset_set_column(backend): # Case: setting a single column on a viewing subset -> subset[col] = value dtype_backend, DataFrame, _ = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -367,13 +310,7 @@ def test_subset_set_column(backend, using_copy_on_write): else: arr = pd.array([10, 11], dtype="Int64") - if using_copy_on_write: - subset["a"] = arr - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - subset["a"] = arr - + subset["a"] = arr subset._mgr._verify_integrity() expected = DataFrame( {"a": [10, 11], "b": [5, 6], "c": [0.2, 0.3]}, index=range(1, 3) @@ -459,17 +396,11 @@ def test_subset_set_columns(backend, using_copy_on_write, dtype): df_orig = df.copy() subset = df[1:3] - if using_copy_on_write: - subset[["a", "c"]] = 0 - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - subset[["a", "c"]] = 0 + subset[["a", "c"]] = 0 subset._mgr._verify_integrity() - if using_copy_on_write: - # first and third column should certainly have no references anymore - assert all(subset._mgr._has_no_reference(i) for i in [0, 2]) + # first and third column should certainly have no references anymore + assert all(subset._mgr._has_no_reference(i) for i in [0, 2]) expected = DataFrame({"a": [0, 0], "b": [5, 6], "c": [0, 0]}, index=range(1, 3)) if dtype_backend == "nullable": # there is not yet a global option, so overriding a column by setting a scalar @@ -582,7 +513,7 @@ def test_subset_chained_getitem( @pytest.mark.parametrize( "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] ) -def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): +def test_subset_chained_getitem_column(backend, dtype): # Case: creating a subset using multiple, chained getitem calls using views # still needs to guarantee proper CoW behaviour dtype_backend, DataFrame, Series = backend @@ -593,22 +524,14 @@ def test_subset_chained_getitem_column(backend, dtype, using_copy_on_write): # modify subset -> don't modify parent subset = df[:]["a"][0:2] - df._clear_item_cache() subset.iloc[0] = 0 - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) # modify parent -> don't modify subset subset = df[:]["a"][0:2] - df._clear_item_cache() df.iloc[0, 0] = 0 expected = Series([1, 2], name="a") - if using_copy_on_write: - tm.assert_series_equal(subset, expected) - else: - assert subset.iloc[0] == 0 + tm.assert_series_equal(subset, expected) @pytest.mark.parametrize( @@ -877,7 +800,7 @@ def test_del_series(backend): # Accessing column as Series -def test_column_as_series(backend, using_copy_on_write): +def test_column_as_series(backend): # Case: selecting a single column now also uses Copy-on-Write dtype_backend, DataFrame, Series = backend df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) @@ -886,28 +809,17 @@ def test_column_as_series(backend, using_copy_on_write): s = df["a"] assert np.shares_memory(get_array(s, "a"), get_array(df, "a")) - - if using_copy_on_write: - s[0] = 0 - else: - warn = SettingWithCopyWarning if dtype_backend == "numpy" else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - s[0] = 0 + s[0] = 0 expected = Series([0, 2, 3], name="a") tm.assert_series_equal(s, expected) - if using_copy_on_write: - # assert not np.shares_memory(s.values, get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - # ensure cached series on getitem is not the changed series - tm.assert_series_equal(df["a"], df_orig["a"]) - else: - df_orig.iloc[0, 0] = 0 - tm.assert_frame_equal(df, df_orig) + # assert not np.shares_memory(s.values, get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + # ensure cached series on getitem is not the changed series + tm.assert_series_equal(df["a"], df_orig["a"]) -def test_column_as_series_set_with_upcast(backend, using_copy_on_write): +def test_column_as_series_set_with_upcast(backend): # Case: selecting a single column now also uses Copy-on-Write -> when # setting a value causes an upcast, we don't need to update the parent # DataFrame through the cache mechanism @@ -920,32 +832,15 @@ def test_column_as_series_set_with_upcast(backend, using_copy_on_write): with pytest.raises(TypeError, match="Invalid value"): s[0] = "foo" expected = Series([1, 2, 3], name="a") - elif using_copy_on_write: + else: with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"): s[0] = "foo" expected = Series(["foo", 2, 3], dtype=object, name="a") - else: - with pd.option_context("chained_assignment", "warn"): - msg = "|".join( - [ - "A value is trying to be set on a copy of a slice from a DataFrame", - "Setting an item of incompatible dtype is deprecated", - ] - ) - with tm.assert_produces_warning( - (SettingWithCopyWarning, FutureWarning), match=msg - ): - s[0] = "foo" - expected = Series(["foo", 2, 3], dtype=object, name="a") tm.assert_series_equal(s, expected) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - # ensure cached series on getitem is not the changed series - tm.assert_series_equal(df["a"], df_orig["a"]) - else: - df_orig["a"] = expected - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) + # ensure cached series on getitem is not the changed series + tm.assert_series_equal(df["a"], df_orig["a"]) @pytest.mark.parametrize( @@ -957,12 +852,7 @@ def test_column_as_series_set_with_upcast(backend, using_copy_on_write): ], ids=["getitem", "loc", "iloc"], ) -def test_column_as_series_no_item_cache( - request, - backend, - method, - using_copy_on_write, -): +def test_column_as_series_no_item_cache(request, backend, method): # Case: selecting a single column (which now also uses Copy-on-Write to protect # the view) should always give a new object (i.e. not make use of a cache) dtype_backend, DataFrame, _ = backend @@ -972,25 +862,12 @@ def test_column_as_series_no_item_cache( s1 = method(df) s2 = method(df) - is_iloc = "iloc" in request.node.name - if using_copy_on_write or is_iloc: - assert s1 is not s2 - else: - assert s1 is s2 + assert s1 is not s2 - if using_copy_on_write: - s1.iloc[0] = 0 - else: - warn = SettingWithCopyWarning if dtype_backend == "numpy" else None - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - s1.iloc[0] = 0 + s1.iloc[0] = 0 - if using_copy_on_write: - tm.assert_series_equal(s2, df_orig["a"]) - tm.assert_frame_equal(df, df_orig) - else: - assert s2.iloc[0] == 0 + tm.assert_series_equal(s2, df_orig["a"]) + tm.assert_frame_equal(df, df_orig) # TODO add tests for other indexing methods on the Series @@ -1074,23 +951,16 @@ def test_series_midx_slice(using_copy_on_write): tm.assert_series_equal(ser, expected) -def test_getitem_midx_slice(using_copy_on_write): +def test_getitem_midx_slice(): df = DataFrame({("a", "x"): [1, 2], ("a", "y"): 1, ("b", "x"): 2}) df_orig = df.copy() new_df = df[("a",)] - if using_copy_on_write: - assert not new_df._mgr._has_no_reference(0) + assert not new_df._mgr._has_no_reference(0) assert np.shares_memory(get_array(df, ("a", "x")), get_array(new_df, "x")) - if using_copy_on_write: - new_df.iloc[0, 0] = 100 - tm.assert_frame_equal(df_orig, df) - else: - with pd.option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - new_df.iloc[0, 0] = 100 - assert df.iloc[0, 0] == 100 + new_df.iloc[0, 0] = 100 + tm.assert_frame_equal(df_orig, df) def test_series_midx_tuples_slice(using_copy_on_write): diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index b3bd63e1c7e4c..011d18f8e609f 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyWarning - import pandas as pd from pandas import ( DataFrame, @@ -12,7 +10,6 @@ Series, Timestamp, date_range, - option_context, period_range, ) import pandas._testing as tm @@ -1540,12 +1537,10 @@ def test_chained_where_mask(using_copy_on_write, func): getattr(df["a"], func)(df["a"] > 2, 5, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) + getattr(df[["a"]], func)(df["a"] > 2, 5, inplace=True) with tm.assert_produces_warning(None): - with option_context("mode.chained_assignment", None): - getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) + getattr(df[df["a"] > 1], func)(df["a"] > 2, 5, inplace=True) def test_asfreq_noop(using_copy_on_write): @@ -1667,23 +1662,10 @@ def test_get(using_copy_on_write, key): result = df.get(key) - if using_copy_on_write: - assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) - result.iloc[0] = 0 - assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - # for non-CoW it depends on whether we got a Series or DataFrame if it - # is a view or copy or triggers a warning or not - warn = SettingWithCopyWarning if isinstance(key, list) else None - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - result.iloc[0] = 0 - - if isinstance(key, list): - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + assert np.shares_memory(get_array(result, "a"), get_array(df, "a")) + result.iloc[0] = 0 + assert not np.shares_memory(get_array(result, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("axis, key", [(0, 0), (1, "a")]) @@ -1701,20 +1683,13 @@ def test_xs(using_copy_on_write, axis, key, dtype): if axis == 1 or single_block: assert np.shares_memory(get_array(df, "a"), get_array(result)) - elif using_copy_on_write: + else: assert result._mgr._has_no_reference(0) if using_copy_on_write or single_block: result.iloc[0] = 0 - else: - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - result.iloc[0] = 0 - if using_copy_on_write or (not single_block and axis == 0): - tm.assert_frame_equal(df, df_orig) - else: - assert df.iloc[0, 0] == 0 + tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("axis", [0, 1]) @@ -1733,14 +1708,7 @@ def test_xs_multiindex(using_copy_on_write, key, level, axis): assert np.shares_memory( get_array(df, df.columns[0]), get_array(result, result.columns[0]) ) - - if not using_copy_on_write: - warn = SettingWithCopyWarning - else: - warn = None - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(warn): - result.iloc[0, 0] = 0 + result.iloc[0, 0] = 0 tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index c8787ac0b364e..b48ad7e3481b9 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -13,7 +13,6 @@ from pandas.errors import ( InvalidIndexError, PerformanceWarning, - SettingWithCopyError, ) from pandas.core.dtypes.common import is_integer @@ -287,7 +286,7 @@ def test_setattr_column(self): df.foobar = 5 assert (df.foobar == 5).all() - def test_setitem(self, float_frame, using_copy_on_write, using_infer_string): + def test_setitem(self, float_frame, using_infer_string): # not sure what else to do here series = float_frame["A"][::2] float_frame["col5"] = series @@ -322,13 +321,8 @@ def test_setitem(self, float_frame, using_copy_on_write, using_infer_string): # so raise/warn smaller = float_frame[:2] - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - if using_copy_on_write: - # With CoW, adding a new column doesn't raise a warning - smaller["col10"] = ["1", "2"] - else: - with pytest.raises(SettingWithCopyError, match=msg): - smaller["col10"] = ["1", "2"] + # With CoW, adding a new column doesn't raise a warning + smaller["col10"] = ["1", "2"] if using_infer_string: assert smaller["col10"].dtype == "string" diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index dc2f0b61e3ba0..96ae1050ed15a 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError - from pandas import ( DataFrame, Index, @@ -122,21 +120,16 @@ def test_xs_keep_level(self): result = df.xs((2008, "sat"), level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) - def test_xs_view(self, using_copy_on_write): + def test_xs_view(self): # in 0.14 this will return a view if possible a copy otherwise, but # this is numpy dependent dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) df_orig = dm.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - dm.xs(2)[:] = 20 - tm.assert_frame_equal(dm, df_orig) - else: - with tm.raises_chained_assignment_error(): - dm.xs(2)[:] = 20 - assert (dm.xs(2) == 20).all() + with tm.raises_chained_assignment_error(): + dm.xs(2)[:] = 20 + tm.assert_frame_equal(dm, df_orig) class TestXSWithMultiIndex: @@ -194,42 +187,22 @@ def test_xs_level_eq_2(self): result = df.xs("c", level=2) tm.assert_frame_equal(result, expected) - def test_xs_setting_with_copy_error( - self, - multiindex_dataframe_random_data, - using_copy_on_write, - ): + def test_xs_setting_with_copy_error(self, multiindex_dataframe_random_data): # this is a copy in 0.14 df = multiindex_dataframe_random_data df_orig = df.copy() result = df.xs("two", level="second") - if using_copy_on_write: - result[:] = 10 - else: - # setting this will give a SettingWithCopyError - # as we are trying to write a view - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - result[:] = 10 + result[:] = 10 tm.assert_frame_equal(df, df_orig) - def test_xs_setting_with_copy_error_multiple( - self, four_level_index_dataframe, using_copy_on_write - ): + def test_xs_setting_with_copy_error_multiple(self, four_level_index_dataframe): # this is a copy in 0.14 df = four_level_index_dataframe df_orig = df.copy() result = df.xs(("a", 4), level=["one", "four"]) - if using_copy_on_write: - result[:] = 10 - else: - # setting this will give a SettingWithCopyError - # as we are trying to write a view - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - result[:] = 10 + result[:] = 10 tm.assert_frame_equal(df, df_orig) @pytest.mark.parametrize("key, level", [("one", "second"), (["one"], ["second"])]) diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py index 4a8adf89b3aef..029aa3a5b8f05 100644 --- a/pandas/tests/frame/methods/test_asof.py +++ b/pandas/tests/frame/methods/test_asof.py @@ -163,19 +163,6 @@ def test_time_zone_aware_index(self, stamp, expected): result = df.asof(stamp) tm.assert_series_equal(result, expected) - def test_is_copy(self, date_range_frame): - # GH-27357, GH-30784: ensure the result of asof is an actual copy and - # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings - df = date_range_frame.astype({"A": "float"}) - N = 50 - df.loc[df.index[15:30], "A"] = np.nan - dates = date_range("1/1/1990", periods=N * 3, freq="25s") - - result = df.asof(dates) - - with tm.assert_produces_warning(None): - result["C"] = 1 - def test_asof_periodindex_mismatched_freq(self): N = 50 rng = period_range("1/1/1990", periods=N, freq="h") diff --git a/pandas/tests/frame/methods/test_sample.py b/pandas/tests/frame/methods/test_sample.py index e65225a33a479..91d735a8b2fa7 100644 --- a/pandas/tests/frame/methods/test_sample.py +++ b/pandas/tests/frame/methods/test_sample.py @@ -333,7 +333,7 @@ def test_sample_aligns_weights_with_frame(self): def test_sample_is_copy(self): # GH#27357, GH#30784: ensure the result of sample is an actual copy and - # doesn't track the parent dataframe / doesn't give SettingWithCopy warnings + # doesn't track the parent dataframe df = DataFrame( np.random.default_rng(2).standard_normal((10, 3)), columns=["a", "b", "c"] ) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index b33ca95bd4180..768c85644c977 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -331,21 +331,15 @@ def test_sort_values_datetimes(self): df2 = df.sort_values(by=["C", "B"]) tm.assert_frame_equal(df1, df2) - def test_sort_values_frame_column_inplace_sort_exception( - self, float_frame, using_copy_on_write - ): + def test_sort_values_frame_column_inplace_sort_exception(self, float_frame): s = float_frame["A"] float_frame_orig = float_frame.copy() - if using_copy_on_write: - # INFO(CoW) Series is a new object, so can be changed inplace - # without modifying original datafame - s.sort_values(inplace=True) - tm.assert_series_equal(s, float_frame_orig["A"].sort_values()) - # column in dataframe is not changed - tm.assert_frame_equal(float_frame, float_frame_orig) - else: - with pytest.raises(ValueError, match="This Series is a view"): - s.sort_values(inplace=True) + # INFO(CoW) Series is a new object, so can be changed inplace + # without modifying original datafame + s.sort_values(inplace=True) + tm.assert_series_equal(s, float_frame_orig["A"].sort_values()) + # column in dataframe is not changed + tm.assert_frame_equal(float_frame, float_frame_orig) cp = s.copy() cp.sort_values() # it works! diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 67bebddaa63ca..0bfde350c259b 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -577,10 +577,9 @@ def f(group): assert result["d"].dtype == np.float64 # this is by definition a mutating operation! - with pd.option_context("mode.chained_assignment", None): - for key, group in grouped: - res = f(group) - tm.assert_frame_equal(res, result.loc[key]) + for key, group in grouped: + res = f(group) + tm.assert_frame_equal(res, result.loc[key]) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 6eeaeb6711d03..dd4bba42eda6f 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -332,10 +332,8 @@ def test_set_value_keeps_names(): index=idx, ) df = df.sort_index() - assert df._is_copy is None assert df.index.names == ("Name", "Number") df.at[("grethe", "4"), "one"] = 99.34 - assert df._is_copy is None assert df.index.names == ("Name", "Number") diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 24a111e283365..c70c0ee10afd6 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,8 +1,6 @@ import numpy as np -import pytest from pandas._libs import index as libindex -from pandas.errors import SettingWithCopyError from pandas import ( DataFrame, @@ -12,7 +10,7 @@ import pandas._testing as tm -def test_detect_chained_assignment(using_copy_on_write): +def test_detect_chained_assignment(): # Inplace ops, originally from: # https://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug a = [12, 23] @@ -29,14 +27,8 @@ def test_detect_chained_assignment(using_copy_on_write): multiind = MultiIndex.from_tuples(tuples, names=["part", "side"]) zed = DataFrame(events, index=["a", "b"], columns=multiind) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - zed["eyes"]["right"].fillna(value=555, inplace=True) - else: - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - with tm.assert_produces_warning(None): - zed["eyes"]["right"].fillna(value=555, inplace=True) + with tm.raises_chained_assignment_error(): + zed["eyes"]["right"].fillna(value=555, inplace=True) def test_cache_updating(using_copy_on_write): diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 17b00244c70f5..d731f796637ea 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.errors import SettingWithCopyError - import pandas as pd from pandas import ( DataFrame, @@ -522,38 +520,21 @@ def test_frame_setitem_view_direct( assert (df["foo"].values == 0).all() -def test_frame_setitem_copy_raises( - multiindex_dataframe_random_data, using_copy_on_write -): +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 - else: - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 + with tm.raises_chained_assignment_error(): + df["foo"]["one"] = 2 -def test_frame_setitem_copy_no_write( - multiindex_dataframe_random_data, using_copy_on_write -): +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data.T expected = frame df = frame.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 - else: - msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["foo"]["one"] = 2 + with tm.raises_chained_assignment_error(): + df["foo"]["one"] = 2 - result = df - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(df, expected) def test_frame_setitem_partial_multiindex(): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 6dbe4f2b3ed3a..7945d88c4a7dc 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -3,11 +3,6 @@ import numpy as np import pytest -from pandas.errors import ( - SettingWithCopyError, - SettingWithCopyWarning, -) - import pandas as pd from pandas import ( DataFrame, @@ -47,11 +42,7 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): # Assignment to wrong series with tm.raises_chained_assignment_error(): df["bb"].iloc[0] = 0.17 - df._clear_item_cache() - if not using_copy_on_write: - tm.assert_almost_equal(df["bb"][0], 0.17) - else: - tm.assert_almost_equal(df["bb"][0], 2.2) + tm.assert_almost_equal(df["bb"][0], 2.2) @pytest.mark.parametrize("do_ref", [True, False]) def test_setitem_cache_updating(self, do_ref): @@ -116,16 +107,10 @@ def test_altering_series_clears_parent_cache(self, using_copy_on_write): df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) ser = df["A"] - if using_copy_on_write: - assert "A" not in df._item_cache - else: - assert "A" in df._item_cache - # Adding a new entry to ser swaps in a new array, so "A" needs to # be removed from df._item_cache ser["c"] = 5 assert len(ser) == 3 - assert "A" not in df._item_cache assert df["A"] is not ser assert len(df["A"]) == 2 @@ -192,7 +177,6 @@ def test_detect_chained_assignment(self, using_copy_on_write): np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64" ) df_original = df.copy() - assert df._is_copy is None with tm.raises_chained_assignment_error(): df["A"][0] = -5 @@ -204,7 +188,7 @@ def test_detect_chained_assignment(self, using_copy_on_write): tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment_raises(self, using_copy_on_write): + def test_detect_chained_assignment_raises(self): # test with the chaining df = DataFrame( { @@ -213,27 +197,14 @@ def test_detect_chained_assignment_raises(self, using_copy_on_write): } ) df_original = df.copy() - assert df._is_copy is None - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - with tm.raises_chained_assignment_error(): - df["A"][1] = -6 - tm.assert_frame_equal(df, df_original) - else: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["A"][0] = -5 - - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["A"][1] = np.nan - - assert df["A"]._is_copy is None + with tm.raises_chained_assignment_error(): + df["A"][0] = -5 + with tm.raises_chained_assignment_error(): + df["A"][1] = -6 + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow - def test_detect_chained_assignment_fails(self, using_copy_on_write): + def test_detect_chained_assignment_fails(self): # Using a copy (the chain), fails df = DataFrame( { @@ -242,15 +213,11 @@ def test_detect_chained_assignment_fails(self, using_copy_on_write): } ) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[0]["A"] = -5 - else: - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[0]["A"] = -5 + with tm.raises_chained_assignment_error(): + df.loc[0]["A"] = -5 @pytest.mark.arm_slow - def test_detect_chained_assignment_doc_example(self, using_copy_on_write): + def test_detect_chained_assignment_doc_example(self): # Doc example df = DataFrame( { @@ -258,45 +225,26 @@ def test_detect_chained_assignment_doc_example(self, using_copy_on_write): "c": Series(range(7), dtype="int64"), } ) - assert df._is_copy is None indexer = df.a.str.startswith("o") - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df[indexer]["c"] = 42 - else: - with pytest.raises(SettingWithCopyError, match=msg): - df[indexer]["c"] = 42 + with tm.raises_chained_assignment_error(): + df[indexer]["c"] = 42 @pytest.mark.arm_slow - def test_detect_chained_assignment_object_dtype(self, using_copy_on_write): - expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) + def test_detect_chained_assignment_object_dtype(self): df = DataFrame( {"A": Series(["aaa", "bbb", "ccc"], dtype=object), "B": [1, 2, 3]} ) df_original = df.copy() - if not using_copy_on_write: - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[0]["A"] = 111 - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"][0] = 111 - tm.assert_frame_equal(df, df_original) - else: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["A"][0] = 111 - - df.loc[0, "A"] = 111 - tm.assert_frame_equal(df, expected) + with tm.raises_chained_assignment_error(): + df["A"][0] = 111 + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow def test_detect_chained_assignment_is_copy_pickle(self): # gh-5475: Make sure that is_copy is picked up reconstruction df = DataFrame({"A": [1, 2]}) - assert df._is_copy is None with tm.ensure_clean("__tmp__pickle") as path: df.to_pickle(path) @@ -304,68 +252,12 @@ def test_detect_chained_assignment_is_copy_pickle(self): df2["B"] = df2["A"] df2["B"] = df2["A"] - @pytest.mark.arm_slow - def test_detect_chained_assignment_setting_entire_column(self): - # gh-5597: a spurious raise as we are setting the entire column here - - df = random_text(100000) - - # Always a copy - x = df.iloc[[0, 1, 2]] - assert x._is_copy is not None - - x = df.iloc[[0, 1, 2, 4]] - assert x._is_copy is not None - - # Explicitly copy - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.loc[indexer].copy() - - assert df._is_copy is None - df["letters"] = df["letters"].apply(str.lower) - - @pytest.mark.arm_slow - def test_detect_chained_assignment_implicit_take(self): - # Implicitly take - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - df = df.loc[indexer] - - assert df._is_copy is not None - df["letters"] = df["letters"].apply(str.lower) - - @pytest.mark.arm_slow - def test_detect_chained_assignment_implicit_take2(self, using_copy_on_write): - if using_copy_on_write: - pytest.skip("_is_copy is not always set for CoW") - # Implicitly take 2 - df = random_text(100000) - indexer = df.letters.apply(lambda x: len(x) > 10) - - df = df.loc[indexer] - assert df._is_copy is not None - df.loc[:, "letters"] = df["letters"].apply(str.lower) - - # with the enforcement of #45333 in 2.0, the .loc[:, letters] setting - # is inplace, so df._is_copy remains non-None. - assert df._is_copy is not None - - df["letters"] = df["letters"].apply(str.lower) - assert df._is_copy is None - @pytest.mark.arm_slow def test_detect_chained_assignment_str(self): df = random_text(100000) indexer = df.letters.apply(lambda x: len(x) > 10) df.loc[indexer, "letters"] = df.loc[indexer, "letters"].apply(str.lower) - @pytest.mark.arm_slow - def test_detect_chained_assignment_is_copy(self): - # an identical take, so no copy - df = DataFrame({"a": [1]}).dropna() - assert df._is_copy is None - df["a"] += 1 - @pytest.mark.arm_slow def test_detect_chained_assignment_sorting(self): df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) @@ -390,24 +282,18 @@ def test_detect_chained_assignment_false_positives(self): str(df) @pytest.mark.arm_slow - def test_detect_chained_assignment_undefined_column(self, using_copy_on_write): + def test_detect_chained_assignment_undefined_column(self): # from SO: # https://stackoverflow.com/questions/24054495/potential-bug-setting-value-for-undefined-column-using-iloc df = DataFrame(np.arange(0, 9), columns=["count"]) df["group"] = "b" df_original = df.copy() - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.iloc[0:5]["group"] = "a" - tm.assert_frame_equal(df, df_original) - else: - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df.iloc[0:5]["group"] = "a" + with tm.raises_chained_assignment_error(): + df.iloc[0:5]["group"] = "a" + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow - def test_detect_chained_assignment_changing_dtype(self, using_copy_on_write): + def test_detect_chained_assignment_changing_dtype(self): # Mixed type setting but same dtype & changing dtype df = DataFrame( { @@ -419,44 +305,25 @@ def test_detect_chained_assignment_changing_dtype(self, using_copy_on_write): ) df_original = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[2]["D"] = "foo" - with tm.raises_chained_assignment_error(): - df.loc[2]["C"] = "foo" - tm.assert_frame_equal(df, df_original) - with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): - df["C"][2] = "foo" - if using_copy_on_write: - tm.assert_frame_equal(df, df_original) - else: - assert df.loc[2, "C"] == "foo" - else: - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[2]["D"] = "foo" - - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[2]["C"] = "foo" - - with pytest.raises(SettingWithCopyError, match=msg): - with tm.raises_chained_assignment_error(): - df["C"][2] = "foo" + with tm.raises_chained_assignment_error(): + df.loc[2]["D"] = "foo" + with tm.raises_chained_assignment_error(): + df.loc[2]["C"] = "foo" + tm.assert_frame_equal(df, df_original) + with tm.raises_chained_assignment_error(extra_warnings=(FutureWarning,)): + df["C"][2] = "foo" + tm.assert_frame_equal(df, df_original) - def test_setting_with_copy_bug(self, using_copy_on_write): + def test_setting_with_copy_bug(self): # operating on a copy df = DataFrame( {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]} ) df_original = df.copy() mask = pd.isna(df.c) - - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df[["c"]][mask] = df[["b"]][mask] - tm.assert_frame_equal(df, df_original) - else: - with pytest.raises(SettingWithCopyError, match=msg): - df[["c"]][mask] = df[["b"]][mask] + with tm.raises_chained_assignment_error(): + df[["c"]][mask] = df[["b"]][mask] + tm.assert_frame_equal(df, df_original) def test_setting_with_copy_bug_no_warning(self): # invalid warning as we are returning a new object @@ -467,20 +334,10 @@ def test_setting_with_copy_bug_no_warning(self): # this should not raise df2["y"] = ["g", "h", "i"] - def test_detect_chained_assignment_warnings_errors(self, using_copy_on_write): + def test_detect_chained_assignment_warnings_errors(self): df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df.loc[0]["A"] = 111 - return - - with option_context("chained_assignment", "warn"): - with tm.assert_produces_warning(SettingWithCopyWarning): - df.loc[0]["A"] = 111 - - with option_context("chained_assignment", "raise"): - with pytest.raises(SettingWithCopyError, match=msg): - df.loc[0]["A"] = 111 + with tm.raises_chained_assignment_error(): + df.loc[0]["A"] = 111 @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) def test_detect_chained_assignment_warning_stacklevel( @@ -490,15 +347,9 @@ def test_detect_chained_assignment_warning_stacklevel( df = DataFrame(np.arange(25).reshape(5, 5)) df_original = df.copy() chained = df.loc[:3] - with option_context("chained_assignment", "warn"): - if not using_copy_on_write: - with tm.assert_produces_warning(SettingWithCopyWarning) as t: - chained[2] = rhs - assert t[0].filename == __file__ - else: - # INFO(CoW) no warning, and original dataframe not changed - chained[2] = rhs - tm.assert_frame_equal(df, df_original) + # INFO(CoW) no warning, and original dataframe not changed + chained[2] = rhs + tm.assert_frame_equal(df, df_original) def test_chained_getitem_with_lists(self): # GH6394 diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 9de14b3a7c112..17492f17132fd 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -12,7 +12,6 @@ import pytz from pandas._libs.tslibs.timezones import maybe_get_tz -from pandas.errors import SettingWithCopyError from pandas.core.dtypes.common import ( is_integer_dtype, @@ -281,21 +280,15 @@ def test_dt_accessor_ambiguous_freq_conversions(self): expected = Series(exp_values, name="xxx") tm.assert_series_equal(ser, expected) - def test_dt_accessor_not_writeable(self, using_copy_on_write): + def test_dt_accessor_not_writeable(self): # no setting allowed ser = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): ser.dt.hour = 5 # trying to set a copy - msg = "modifications to a property of a datetimelike.+not supported" - with pd.option_context("chained_assignment", "raise"): - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - ser.dt.hour[0] = 5 - else: - with pytest.raises(SettingWithCopyError, match=msg): - ser.dt.hour[0] = 5 + with tm.raises_chained_assignment_error(): + ser.dt.hour[0] = 5 @pytest.mark.parametrize( "method, dates", diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index cb83bc5833fba..bd548eb80e182 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -79,17 +79,8 @@ def test_sort_values(self, datetime_series, using_copy_on_write): # Series.sort_values operating on a view df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) s = df.iloc[:, 0] - - msg = ( - "This Series is a view of some other array, to sort in-place " - "you must create a copy" - ) - if using_copy_on_write: - s.sort_values(inplace=True) - tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) - else: - with pytest.raises(ValueError, match=msg): - s.sort_values(inplace=True) + s.sort_values(inplace=True) + tm.assert_series_equal(s, df.iloc[:, 0].sort_values()) def test_sort_values_categorical(self): cat = Series(Categorical(["a", "b", "b", "a"], ordered=False)) diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index feba0e86c6b32..ead36ee08b407 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -46,6 +46,8 @@ def test_dask(df): pd.set_option("compute.use_numexpr", olduse) +# TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082 +@pytest.mark.skip(reason="not implemented with CoW") def test_dask_ufunc(): # dask sets "compute.use_numexpr" to False, so catch the current value # and ensure to reset it afterwards to avoid impacting other tests diff --git a/pandas/tests/test_errors.py b/pandas/tests/test_errors.py index c99751dca6c9d..c5c4b234eb129 100644 --- a/pandas/tests/test_errors.py +++ b/pandas/tests/test_errors.py @@ -37,8 +37,6 @@ "PossibleDataLossError", "PossiblePrecisionLoss", "PyperclipException", - "SettingWithCopyError", - "SettingWithCopyWarning", "SpecificationError", "UnsortedIndexError", "UnsupportedFunctionCall", From 0a841ae3161304e00dd401498d0339bca6970e9b Mon Sep 17 00:00:00 2001 From: Noah Asing <7989798+noah-asing@users.noreply.github.com> Date: Sun, 4 Feb 2024 07:24:14 -0800 Subject: [PATCH 36/50] DOC: Clarifying brackets vs. parentheses (#57236) --- .../getting_started/intro_tutorials/02_read_write.rst | 6 +++--- .../getting_started/intro_tutorials/03_subset_data.rst | 4 ++-- .../getting_started/intro_tutorials/05_add_columns.rst | 2 +- .../intro_tutorials/06_calculate_statistics.rst | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/source/getting_started/intro_tutorials/02_read_write.rst b/doc/source/getting_started/intro_tutorials/02_read_write.rst index 832c2cc25712f..ae658ec6abbaf 100644 --- a/doc/source/getting_started/intro_tutorials/02_read_write.rst +++ b/doc/source/getting_started/intro_tutorials/02_read_write.rst @@ -97,11 +97,11 @@ in this ``DataFrame`` are integers (``int64``), floats (``float64``) and strings (``object``). .. note:: - When asking for the ``dtypes``, no brackets are used! + When asking for the ``dtypes``, no parentheses ``()`` are used! ``dtypes`` is an attribute of a ``DataFrame`` and ``Series``. Attributes - of a ``DataFrame`` or ``Series`` do not need brackets. Attributes + of a ``DataFrame`` or ``Series`` do not need ``()``. Attributes represent a characteristic of a ``DataFrame``/``Series``, whereas - methods (which require brackets) *do* something with the + methods (which require parentheses ``()``) *do* something with the ``DataFrame``/``Series`` as introduced in the :ref:`first tutorial <10min_tut_01_tableoriented>`. .. raw:: html diff --git a/doc/source/getting_started/intro_tutorials/03_subset_data.rst b/doc/source/getting_started/intro_tutorials/03_subset_data.rst index 6d7ec01551572..88d7d653c9e83 100644 --- a/doc/source/getting_started/intro_tutorials/03_subset_data.rst +++ b/doc/source/getting_started/intro_tutorials/03_subset_data.rst @@ -101,7 +101,7 @@ selection brackets ``[]``. .. note:: The inner square brackets define a :ref:`Python list ` with column names, whereas - the outer brackets are used to select the data from a pandas + the outer square brackets are used to select the data from a pandas ``DataFrame`` as seen in the previous example. The returned data type is a pandas DataFrame: @@ -354,7 +354,7 @@ See the user guide section on :ref:`different choices for indexing REMEMBER - When selecting subsets of data, square brackets ``[]`` are used. -- Inside these brackets, you can use a single column/row label, a list +- Inside these square brackets, you can use a single column/row label, a list of column/row labels, a slice of labels, a conditional expression or a colon. - Select specific rows and/or columns using ``loc`` when using the row diff --git a/doc/source/getting_started/intro_tutorials/05_add_columns.rst b/doc/source/getting_started/intro_tutorials/05_add_columns.rst index d59a70cc2818e..3e0f75b210dbb 100644 --- a/doc/source/getting_started/intro_tutorials/05_add_columns.rst +++ b/doc/source/getting_started/intro_tutorials/05_add_columns.rst @@ -51,7 +51,7 @@ hPa, the conversion factor is 1.882*) air_quality["london_mg_per_cubic"] = air_quality["station_london"] * 1.882 air_quality.head() -To create a new column, use the ``[]`` brackets with the new column name +To create a new column, use the square brackets ``[]`` with the new column name at the left side of the assignment. .. raw:: html diff --git a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst index fe3ae820e7085..668925ce79252 100644 --- a/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst +++ b/doc/source/getting_started/intro_tutorials/06_calculate_statistics.rst @@ -162,7 +162,7 @@ columns by passing ``numeric_only=True``: It does not make much sense to get the average value of the ``Pclass``. If we are only interested in the average age for each gender, the -selection of columns (rectangular brackets ``[]`` as usual) is supported +selection of columns (square brackets ``[]`` as usual) is supported on the grouped data as well: .. ipython:: python From b8b096ef535b976d710c625d4a2d884a721cd79b Mon Sep 17 00:00:00 2001 From: Mark Bekooy Date: Sun, 4 Feb 2024 17:05:20 +0100 Subject: [PATCH 37/50] Update type hint in io/formats/format.py (#57249) * Update type hint in format.py The brackets around the type hint broke the libcst parser. Since the brackets are not necessary for the type hint, they have been removed * Update v2.2.1.rst * Undo update to v2.2.1.rst Type annotation changes are not added to change log --- pandas/io/formats/format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 65124f97459cd..29f6e8ab96f71 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1546,7 +1546,7 @@ def _format_strings(self) -> list[str]: def format_percentiles( - percentiles: (np.ndarray | Sequence[float]), + percentiles: np.ndarray | Sequence[float], ) -> list[str]: """ Outputs rounded and formatted percentiles. From e777b07ff36e7f70ea015724c2d22ed0ae1417a7 Mon Sep 17 00:00:00 2001 From: Lakshman <61258323+LakshmanKishore@users.noreply.github.com> Date: Sun, 4 Feb 2024 23:41:18 +0530 Subject: [PATCH 38/50] =?UTF-8?q?DOC:=20Replace=20pandas=20logo=20with=20a?= =?UTF-8?q?=20conditional=20image=20for=20light=20and=20dark=20=E2=80=A6?= =?UTF-8?q?=20(#57223)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DOC: Replace pandas logo with a conditional image for light and dark themes Replaced the static pandas logo with a `picture` tag that displays a different logo based on the color scheme. In light mode, the original logo is shown, while in dark mode, a white version of the logo is displayed for better visibility. --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6fa20d237babe..e5329d66c2d89 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ -
-
-
+ + + Pandas Logo + ----------------- From ba76f73ce6a9eaa1354198e47090e66d761a0e7f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sun, 4 Feb 2024 18:15:37 +0000 Subject: [PATCH 39/50] BUG: ensure_string_array might modify read-only array inplace (#57212) * BUG: ensure_string_array might modify read-only array inplace * BUG: ensure_string_array might modify read-only array inplace * Fix pyarrow installed error --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/_libs/lib.pyx | 3 +++ pandas/tests/copy_view/test_astype.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 768bb9e99407a..81c3f88f7e8ad 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -160,7 +160,7 @@ Numeric Conversion ^^^^^^^^^^ -- +- Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Strings diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 5eb20960f0e3d..50feda8fb188e 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -770,6 +770,9 @@ cpdef ndarray[object] ensure_string_array( result = result.copy() elif not copy and result is arr: already_copied = False + elif not copy and not result.flags.writeable: + # Weird edge case where result is a view + already_copied = False if issubclass(arr.dtype.type, np.str_): # short-circuit, all elements are str diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 3c1a157dd2c6a..f280e2143fee0 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -4,6 +4,7 @@ import pytest from pandas.compat.pyarrow import pa_version_under12p0 +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -139,6 +140,17 @@ def test_astype_string_copy_on_pickle_roundrip(): tm.assert_series_equal(base, base_copy) +@td.skip_if_no("pyarrow") +def test_astype_string_read_only_on_pickle_roundrip(): + # https://github.com/pandas-dev/pandas/issues/54654 + # ensure_string_array may alter read-only array inplace + base = Series(np.array([(1, 2), None, 1], dtype="object")) + base_copy = pickle.loads(pickle.dumps(base)) + base_copy._values.flags.writeable = False + base_copy.astype("string[pyarrow]", copy=False) + tm.assert_series_equal(base, base_copy) + + def test_astype_dict_dtypes(using_copy_on_write): df = DataFrame( {"a": [1, 2, 3], "b": [4, 5, 6], "c": Series([1.5, 1.5, 1.5], dtype="float64")} From a28de76044305b39490ee38895b5db9ce0d699d0 Mon Sep 17 00:00:00 2001 From: Xiao Yuan Date: Mon, 5 Feb 2024 02:19:04 +0800 Subject: [PATCH 40/50] Fix errors in docstring for `pandas.PeriodIndex` (#57247) * Fix docstring errors * Update --- ci/code_checks.sh | 1 - pandas/core/indexes/period.py | 90 ++++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 30 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2eb5b73d68964..75ce6b179b966 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -90,7 +90,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.TimedeltaIndex.round\ pandas.TimedeltaIndex.floor\ pandas.TimedeltaIndex.ceil\ - pandas.PeriodIndex\ pandas.PeriodIndex.strftime\ pandas.Series.rename_axis\ pandas.Series.dt.to_period\ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b2f1933800fd3..ab499665b13ed 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -94,39 +94,24 @@ class PeriodIndex(DatetimeIndexOpsMixin): ---------- data : array-like (1d int np.ndarray or PeriodArray), optional Optional period-like data to construct index with. - copy : bool - Make a copy of input ndarray. - freq : str or period object, optional - One of pandas period strings or corresponding objects. - year : int, array, or Series, default None - - .. deprecated:: 2.2.0 - Use PeriodIndex.from_fields instead. - month : int, array, or Series, default None - - .. deprecated:: 2.2.0 - Use PeriodIndex.from_fields instead. - quarter : int, array, or Series, default None + ordinal : array-like of int, optional + The period offsets from the proleptic Gregorian epoch. .. deprecated:: 2.2.0 - Use PeriodIndex.from_fields instead. - day : int, array, or Series, default None - - .. deprecated:: 2.2.0 - Use PeriodIndex.from_fields instead. - hour : int, array, or Series, default None - - .. deprecated:: 2.2.0 - Use PeriodIndex.from_fields instead. - minute : int, array, or Series, default None - - .. deprecated:: 2.2.0 - Use PeriodIndex.from_fields instead. - second : int, array, or Series, default None + Use PeriodIndex.from_ordinals instead. + freq : str or period object, optional + One of pandas period strings or corresponding objects. + dtype : str or PeriodDtype, default None + A dtype from which to extract a freq. + copy : bool + Make a copy of input ndarray. + name : str, default None + Name of the resulting PeriodIndex. + **fields : optional + Date fields such as year, month, etc. .. deprecated:: 2.2.0 Use PeriodIndex.from_fields instead. - dtype : str or PeriodDtype, default None Attributes ---------- @@ -171,7 +156,7 @@ class PeriodIndex(DatetimeIndexOpsMixin): Examples -------- - >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3]) + >>> idx = pd.PeriodIndex(data=['2000Q1', '2002Q3'], freq='Q') >>> idx PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') """ @@ -331,6 +316,31 @@ def from_fields( second=None, freq=None, ) -> Self: + """ + Construct a PeriodIndex from fields (year, month, day, etc.). + + Parameters + ---------- + year : int, array, or Series, default None + quarter : int, array, or Series, default None + month : int, array, or Series, default None + day : int, array, or Series, default None + hour : int, array, or Series, default None + minute : int, array, or Series, default None + second : int, array, or Series, default None + freq : str or period object, optional + One of pandas period strings or corresponding objects. + + Returns + ------- + PeriodIndex + + Examples + -------- + >>> idx = pd.PeriodIndex.from_fields(year=[2000, 2002], quarter=[1, 3]) + >>> idx + PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') + """ fields = { "year": year, "quarter": quarter, @@ -346,6 +356,28 @@ def from_fields( @classmethod def from_ordinals(cls, ordinals, *, freq, name=None) -> Self: + """ + Construct a PeriodIndex from ordinals. + + Parameters + ---------- + ordinals : array-like of int + The period offsets from the proleptic Gregorian epoch. + freq : str or period object + One of pandas period strings or corresponding objects. + name : str, default None + Name of the resulting PeriodIndex. + + Returns + ------- + PeriodIndex + + Examples + -------- + >>> idx = pd.PeriodIndex.from_ordinals([-1, 0, 1], freq='Q') + >>> idx + PeriodIndex(['1969Q4', '1970Q1', '1970Q2'], dtype='period[Q-DEC]') + """ ordinals = np.asarray(ordinals, dtype=np.int64) dtype = PeriodDtype(freq) data = PeriodArray._simple_new(ordinals, dtype=dtype) From 266bd4cde81f917c9d30257102edc7b0b80fb404 Mon Sep 17 00:00:00 2001 From: Jordan Murphy <35613487+jordan-d-murphy@users.noreply.github.com> Date: Sun, 4 Feb 2024 11:21:22 -0700 Subject: [PATCH 41/50] DOC: fix PR02 errors in docstring for pandas.Series.rename_axis (#57239) * DOC: fix PR02 errors in docstring for pandas.Series.rename_axis * Refactor Series.rename_axis and NDFrame.rename_axis to have separate docstrings * removed unnecessary columns ref in docstring for Series * removed another unnecessary columns ref in docstring for Series --- ci/code_checks.sh | 1 - pandas/core/generic.py | 31 ++++++--------------- pandas/core/series.py | 62 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 75ce6b179b966..cebc8e976425b 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -91,7 +91,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.TimedeltaIndex.floor\ pandas.TimedeltaIndex.ceil\ pandas.PeriodIndex.strftime\ - pandas.Series.rename_axis\ pandas.Series.dt.to_period\ pandas.Series.dt.tz_localize\ pandas.Series.dt.tz_convert\ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1503bb37adc29..7bb07694c34a5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1158,18 +1158,18 @@ def rename_axis( ---------- mapper : scalar, list-like, optional Value to set the axis name attribute. - index, columns : scalar, list-like, dict-like or function, optional - A scalar, list-like, dict-like or functions transformations to - apply to that axis' values. - Note that the ``columns`` parameter is not allowed if the - object is a Series. This parameter only apply for DataFrame - type objects. Use either ``mapper`` and ``axis`` to specify the axis to target with ``mapper``, or ``index`` and/or ``columns``. + index : scalar, list-like, dict-like or function, optional + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. + columns : scalar, list-like, dict-like or function, optional + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to rename. For `Series` this parameter is unused and defaults to 0. + The axis to rename. copy : bool, default None Also copy underlying data. @@ -1190,7 +1190,7 @@ def rename_axis( Returns ------- - Series, DataFrame, or None + DataFrame, or None The same type as the caller or None if ``inplace=True``. See Also @@ -1220,21 +1220,6 @@ def rename_axis( Examples -------- - **Series** - - >>> s = pd.Series(["dog", "cat", "monkey"]) - >>> s - 0 dog - 1 cat - 2 monkey - dtype: object - >>> s.rename_axis("animal") - animal - 0 dog - 1 cat - 2 monkey - dtype: object - **DataFrame** >>> df = pd.DataFrame({"num_legs": [4, 4, 2], diff --git a/pandas/core/series.py b/pandas/core/series.py index e9d340237c234..d3c199286931f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5041,7 +5041,6 @@ def rename_axis( ) -> Self | None: ... - @doc(NDFrame.rename_axis) def rename_axis( self, mapper: IndexLabel | lib.NoDefault = lib.no_default, @@ -5051,6 +5050,67 @@ def rename_axis( copy: bool = True, inplace: bool = False, ) -> Self | None: + """ + Set the name of the axis for the index. + + Parameters + ---------- + mapper : scalar, list-like, optional + Value to set the axis name attribute. + + Use either ``mapper`` and ``axis`` to + specify the axis to target with ``mapper``, or ``index``. + + index : scalar, list-like, dict-like or function, optional + A scalar, list-like, dict-like or functions transformations to + apply to that axis' values. + axis : {0 or 'index'}, default 0 + The axis to rename. For `Series` this parameter is unused and defaults to 0. + copy : bool, default None + Also copy underlying data. + + .. note:: + The `copy` keyword will change behavior in pandas 3.0. + `Copy-on-Write + `__ + will be enabled by default, which means that all methods with a + `copy` keyword will use a lazy copy mechanism to defer the copy and + ignore the `copy` keyword. The `copy` keyword will be removed in a + future version of pandas. + + You can already get the future behavior and improvements through + enabling copy on write ``pd.options.mode.copy_on_write = True`` + inplace : bool, default False + Modifies the object directly, instead of creating a new Series + or DataFrame. + + Returns + ------- + Series, or None + The same type as the caller or None if ``inplace=True``. + + See Also + -------- + Series.rename : Alter Series index labels or name. + DataFrame.rename : Alter DataFrame index labels or name. + Index.rename : Set new names on index. + + Examples + -------- + + >>> s = pd.Series(["dog", "cat", "monkey"]) + >>> s + 0 dog + 1 cat + 2 monkey + dtype: object + >>> s.rename_axis("animal") + animal + 0 dog + 1 cat + 2 monkey + dtype: object + """ return super().rename_axis( mapper=mapper, index=index, From 8848692449cb2eab710ae94da8620df0552a7295 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 5 Feb 2024 02:00:06 +0000 Subject: [PATCH 42/50] CoW: Enforce some deprecations on the block level (#57253) --- pandas/core/frame.py | 1 - pandas/core/generic.py | 19 ++- pandas/core/internals/blocks.py | 202 ++++++++++++------------------ pandas/core/internals/managers.py | 50 +------- pandas/core/series.py | 2 +- 5 files changed, 94 insertions(+), 180 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index afa680d064c4a..614e8ff0232f5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10744,7 +10744,6 @@ def _series_round(ser: Series, decimals: int) -> Series: # type "Union[int, integer[Any]]"; expected "int" new_mgr = self._mgr.round( decimals=decimals, # type: ignore[arg-type] - using_cow=using_copy_on_write(), ) return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( self, method="round" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 7bb07694c34a5..61fb757fafec4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6360,9 +6360,6 @@ def astype( 2 2020-01-03 dtype: datetime64[ns] """ - if copy and using_copy_on_write(): - copy = False - if is_dict_like(dtype): if self.ndim == 1: # i.e. Series if len(dtype) > 1 or self.name not in dtype: @@ -6371,7 +6368,7 @@ def astype( "the key in Series dtype mappings." ) new_type = dtype[self.name] - return self.astype(new_type, copy, errors) + return self.astype(new_type, errors=errors) # GH#44417 cast to Series so we can use .iat below, which will be # robust in case we @@ -6393,10 +6390,10 @@ def astype( for i, (col_name, col) in enumerate(self.items()): cdt = dtype_ser.iat[i] if isna(cdt): - res_col = col.copy(deep=copy) + res_col = col.copy(deep=False) else: try: - res_col = col.astype(dtype=cdt, copy=copy, errors=errors) + res_col = col.astype(dtype=cdt, errors=errors) except ValueError as ex: ex.args = ( f"{ex}: Error while type casting for column '{col_name}'", @@ -6410,22 +6407,20 @@ def astype( if isinstance(dtype, ExtensionDtype) and all( arr.dtype == dtype for arr in self._mgr.arrays ): - return self.copy(deep=copy) + return self.copy(deep=False) # GH 18099/22869: columnwise conversion to extension dtype # GH 24704: self.items handles duplicate column names - results = [ - ser.astype(dtype, copy=copy, errors=errors) for _, ser in self.items() - ] + results = [ser.astype(dtype, errors=errors) for _, ser in self.items()] else: # else, only a single dtype is given - new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) + new_data = self._mgr.astype(dtype=dtype, errors=errors) res = self._constructor_from_mgr(new_data, axes=new_data.axes) return res.__finalize__(self, method="astype") # GH 33113: handle empty frame or series if not results: - return self.copy(deep=None) + return self.copy(deep=False) # GH 19920: retain column metadata after concat result = concat(results, axis=1, copy=False) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bb65e7a4d0838..02296643acc3e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -527,14 +527,13 @@ def coerce_to_target_dtype(self, other, warn_on_upcast: bool = False) -> Block: f"{self.values.dtype}. Please report a bug at " "https://github.com/pandas-dev/pandas/issues." ) - return self.astype(new_dtype, copy=False) + return self.astype(new_dtype) @final def _maybe_downcast( self, blocks: list[Block], downcast, - using_cow: bool, caller: str, ) -> list[Block]: if downcast is False: @@ -551,7 +550,7 @@ def _maybe_downcast( return blocks nbs = extend_blocks( - [blk.convert(using_cow=using_cow, copy=not using_cow) for blk in blocks] + [blk.convert(using_cow=True, copy=False) for blk in blocks] ) if caller == "fillna": if len(nbs) != len(blocks) or not all( @@ -576,7 +575,7 @@ def _maybe_downcast( elif caller == "where" and get_option("future.no_silent_downcasting") is True: return blocks else: - nbs = extend_blocks([b._downcast_2d(downcast, using_cow) for b in blocks]) + nbs = extend_blocks([b._downcast_2d(downcast, True) for b in blocks]) # When _maybe_downcast is called with caller="where", it is either # a) with downcast=False, which is a no-op (the desired future behavior) @@ -667,8 +666,6 @@ def convert( def convert_dtypes( self, - copy: bool, - using_cow: bool, infer_objects: bool = True, convert_string: bool = True, convert_integer: bool = True, @@ -677,14 +674,14 @@ def convert_dtypes( dtype_backend: DtypeBackend = "numpy_nullable", ) -> list[Block]: if infer_objects and self.is_object: - blks = self.convert(copy=False, using_cow=using_cow) + blks = self.convert(copy=False) else: blks = [self] if not any( [convert_floating, convert_integer, convert_boolean, convert_string] ): - return [b.copy(deep=copy) for b in blks] + return [b.copy(deep=False) for b in blks] rbs = [] for blk in blks: @@ -704,11 +701,11 @@ def convert_dtypes( ] if all(dtype == self.dtype for dtype in dtypes): # Avoid block splitting if no dtype changes - rbs.append(blk.copy(deep=copy)) + rbs.append(blk.copy(deep=False)) continue for dtype, b in zip(dtypes, sub_blks): - rbs.append(b.astype(dtype=dtype, copy=copy, squeeze=b.ndim != 1)) + rbs.append(b.astype(dtype=dtype, squeeze=b.ndim != 1)) return rbs # --------------------------------------------------------------------- @@ -723,9 +720,7 @@ def dtype(self) -> DtypeObj: def astype( self, dtype: DtypeObj, - copy: bool = False, errors: IgnoreRaise = "raise", - using_cow: bool = False, squeeze: bool = False, ) -> Block: """ @@ -734,13 +729,9 @@ def astype( Parameters ---------- dtype : np.dtype or ExtensionDtype - copy : bool, default False - copy if indicated errors : str, {'raise', 'ignore'}, default 'raise' - ``raise`` : allow exceptions to be raised - ``ignore`` : suppress exceptions. On error return original object - using_cow: bool, default False - Signaling if copy on write copy logic is used. squeeze : bool, default False squeeze values to ndim=1 if only one column is given @@ -754,18 +745,18 @@ def astype( raise ValueError("Can not squeeze with more than one column.") values = values[0, :] # type: ignore[call-overload] - new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) + new_values = astype_array_safe(values, dtype, errors=errors) new_values = maybe_coerce_values(new_values) refs = None - if (using_cow or not copy) and astype_is_view(values.dtype, new_values.dtype): + if astype_is_view(values.dtype, new_values.dtype): refs = self.refs newb = self.make_block(new_values, refs=refs) if newb.shape != self.shape: raise TypeError( - f"cannot set astype for copy = [{copy}] for dtype " + f"cannot set astype for dtype " f"({self.dtype.name} [{self.shape}]) to different shape " f"({newb.dtype.name} [{newb.shape}])" ) @@ -801,8 +792,16 @@ def copy(self, deep: bool = True) -> Self: # --------------------------------------------------------------------- # Copy-on-Write Helpers + def _maybe_copy(self, inplace: bool) -> Self: + if inplace: + deep = self.refs.has_reference() + return self.copy(deep=deep) + return self.copy() + @final - def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self: + def _maybe_copy_cow_check( + self, using_cow: bool = True, inplace: bool = True + ) -> Self: if using_cow and inplace: deep = self.refs.has_reference() blk = self.copy(deep=deep) @@ -811,7 +810,18 @@ def _maybe_copy(self, using_cow: bool, inplace: bool) -> Self: return blk @final - def _get_refs_and_copy(self, using_cow: bool, inplace: bool): + def _get_refs_and_copy(self, inplace: bool): + refs = None + copy = not inplace + if inplace: + if self.refs.has_reference(): + copy = True + else: + refs = self.refs + return copy, refs + + @final + def _get_refs_and_copy_cow_check(self, using_cow: bool, inplace: bool): refs = None copy = not inplace if inplace: @@ -847,7 +857,7 @@ def replace( if isinstance(values, Categorical): # TODO: avoid special-casing # GH49404 - blk = self._maybe_copy(using_cow, inplace) + blk = self._maybe_copy_cow_check(using_cow, inplace) values = cast(Categorical, blk.values) values._replace(to_replace=to_replace, value=value, inplace=True) return [blk] @@ -875,7 +885,7 @@ def replace( elif self._can_hold_element(value): # TODO(CoW): Maybe split here as well into columns where mask has True # and rest? - blk = self._maybe_copy(using_cow, inplace) + blk = self._maybe_copy_cow_check(using_cow, inplace) putmask_inplace(blk.values, mask, value) if not (self.is_object and value is None): @@ -968,7 +978,7 @@ def _replace_regex( rx = re.compile(to_replace) - block = self._maybe_copy(using_cow, inplace) + block = self._maybe_copy_cow_check(using_cow, inplace) replace_regex(block.values, rx, value, mask) @@ -1005,7 +1015,7 @@ def replace_list( if isinstance(values, Categorical): # TODO: avoid special-casing # GH49404 - blk = self._maybe_copy(using_cow, inplace) + blk = self._maybe_copy_cow_check(using_cow, inplace) values = cast(Categorical, blk.values) values._replace(to_replace=src_list, value=dest_list, inplace=True) return [blk] @@ -1164,7 +1174,7 @@ def _replace_coerce( # gh-45601, gh-45836, gh-46634 if mask.any(): has_ref = self.refs.has_reference() - nb = self.astype(np.dtype(object), copy=False, using_cow=using_cow) + nb = self.astype(np.dtype(object)) if (nb is self or using_cow) and not inplace: nb = nb.copy() elif inplace and has_ref and nb.refs.has_reference() and using_cow: @@ -1325,7 +1335,7 @@ def _unstack( # --------------------------------------------------------------------- - def setitem(self, indexer, value, using_cow: bool = False) -> Block: + def setitem(self, indexer, value) -> Block: """ Attempt self.values[indexer] = value, possibly creating a new array. @@ -1335,8 +1345,6 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: The subset of self.values to set value : object The value being set - using_cow: bool, default False - Signaling if CoW is used. Returns ------- @@ -1375,7 +1383,7 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: # test_iloc_setitem_custom_object casted = setitem_datetimelike_compat(values, len(vi), casted) - self = self._maybe_copy(using_cow, inplace=True) + self = self._maybe_copy(inplace=True) values = cast(np.ndarray, self.values.T) if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 @@ -1383,7 +1391,7 @@ def setitem(self, indexer, value, using_cow: bool = False) -> Block: values[indexer] = casted return self - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask(self, mask, new) -> list[Block]: """ putmask the data to the block; it is possible that we may create a new dtype of block @@ -1394,7 +1402,6 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: ---------- mask : np.ndarray[bool], SparseArray[bool], or BooleanArray new : a ndarray/object - using_cow: bool, default False Returns ------- @@ -1412,14 +1419,12 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: new = extract_array(new, extract_numpy=True) if noop: - if using_cow: - return [self.copy(deep=False)] - return [self] + return [self.copy(deep=False)] try: casted = np_can_hold_element(values.dtype, new) - self = self._maybe_copy(using_cow, inplace=True) + self = self._maybe_copy(inplace=True) values = cast(np.ndarray, self.values) putmask_without_repeat(values.T, mask, casted) @@ -1435,7 +1440,7 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: ).putmask(mask, new) else: indexer = mask.nonzero()[0] - nb = self.setitem(indexer, new[indexer], using_cow=using_cow) + nb = self.setitem(indexer, new[indexer]) return [nb] else: @@ -1450,13 +1455,11 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: n = new[:, i : i + 1] submask = orig_mask[:, i : i + 1] - rbs = nb.putmask(submask, n, using_cow=using_cow) + rbs = nb.putmask(submask, n) res_blocks.extend(rbs) return res_blocks - def where( - self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False - ) -> list[Block]: + def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: """ evaluate the block; return result block(s) from the result @@ -1487,9 +1490,7 @@ def where( icond, noop = validate_putmask(values, ~cond) if noop: # GH-39595: Always return a copy; short-circuit up/downcasting - if using_cow: - return [self.copy(deep=False)] - return [self.copy()] + return [self.copy(deep=False)] if other is lib.no_default: other = self.fill_value @@ -1508,10 +1509,8 @@ def where( # no need to split columns block = self.coerce_to_target_dtype(other) - blocks = block.where(orig_other, cond, using_cow=using_cow) - return self._maybe_downcast( - blocks, downcast=_downcast, using_cow=using_cow, caller="where" - ) + blocks = block.where(orig_other, cond) + return self._maybe_downcast(blocks, downcast=_downcast, caller="where") else: # since _maybe_downcast would split blocks anyway, we @@ -1528,9 +1527,7 @@ def where( oth = other[:, i : i + 1] submask = cond[:, i : i + 1] - rbs = nb.where( - oth, submask, _downcast=_downcast, using_cow=using_cow - ) + rbs = nb.where(oth, submask, _downcast=_downcast) res_blocks.extend(rbs) return res_blocks @@ -1579,7 +1576,6 @@ def fillna( limit: int | None = None, inplace: bool = False, downcast=None, - using_cow: bool = False, ) -> list[Block]: """ fillna on the block with the value. If we fail, then convert to @@ -1598,24 +1594,18 @@ def fillna( if noop: # we can't process the value, but nothing to do if inplace: - if using_cow: - return [self.copy(deep=False)] - # Arbitrarily imposing the convention that we ignore downcast - # on no-op when inplace=True - return [self] + return [self.copy(deep=False)] else: # GH#45423 consistent downcasting on no-ops. - nb = self.copy(deep=not using_cow) - nbs = nb._maybe_downcast( - [nb], downcast=downcast, using_cow=using_cow, caller="fillna" - ) + nb = self.copy(deep=False) + nbs = nb._maybe_downcast([nb], downcast=downcast, caller="fillna") return nbs if limit is not None: mask[mask.cumsum(self.ndim - 1) > limit] = False if inplace: - nbs = self.putmask(mask.T, value, using_cow=using_cow) + nbs = self.putmask(mask.T, value) else: # without _downcast, we would break # test_fillna_dtype_conversion_equiv_replace @@ -1626,9 +1616,7 @@ def fillna( # different behavior in _maybe_downcast. return extend_blocks( [ - blk._maybe_downcast( - [blk], downcast=downcast, using_cow=using_cow, caller="fillna" - ) + blk._maybe_downcast([blk], downcast=downcast, caller="fillna") for blk in nbs ] ) @@ -1642,15 +1630,12 @@ def pad_or_backfill( limit: int | None = None, limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, - using_cow: bool = False, ) -> list[Block]: if not self._can_hold_na: # If there are no NAs, then interpolate is a no-op - if using_cow: - return [self.copy(deep=False)] - return [self] if inplace else [self.copy()] + return [self.copy(deep=False)] - copy, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(inplace) # Dispatch to the NumpyExtensionArray method. # We know self.array_values is a NumpyExtensionArray bc EABlock overrides @@ -1669,7 +1654,7 @@ def pad_or_backfill( data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow, caller="fillna") + return nb._maybe_downcast([nb], downcast, caller="fillna") @final def interpolate( @@ -1682,7 +1667,6 @@ def interpolate( limit_direction: Literal["forward", "backward", "both"] = "forward", limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, - using_cow: bool = False, **kwargs, ) -> list[Block]: inplace = validate_bool_kwarg(inplace, "inplace") @@ -1693,20 +1677,16 @@ def interpolate( if not self._can_hold_na: # If there are no NAs, then interpolate is a no-op - if using_cow: - return [self.copy(deep=False)] - return [self] if inplace else [self.copy()] + return [self.copy(deep=False)] # TODO(3.0): this case will not be reachable once GH#53638 is enforced if self.dtype == _dtype_obj: # only deal with floats # bc we already checked that can_hold_na, we don't have int dtype here # test_interp_basic checks that we make a copy here - if using_cow: - return [self.copy(deep=False)] - return [self] if inplace else [self.copy()] + return [self.copy(deep=False)] - copy, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(inplace) # Dispatch to the EA method. new_values = self.array_values.interpolate( @@ -1722,7 +1702,7 @@ def interpolate( data = extract_array(new_values, extract_numpy=True) nb = self.make_block_same_class(data, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow, caller="interpolate") + return nb._maybe_downcast([nb], downcast, caller="interpolate") @final def diff(self, n: int) -> list[Block]: @@ -1797,7 +1777,7 @@ def quantile( return new_block_2d(result, placement=self._mgr_locs) @final - def round(self, decimals: int, using_cow: bool = False) -> Self: + def round(self, decimals: int) -> Self: """ Rounds the values. If the block is not of an integer or float dtype, nothing happens. @@ -1809,26 +1789,19 @@ def round(self, decimals: int, using_cow: bool = False) -> Self: decimals: int, Number of decimal places to round to. Caller is responsible for validating this - using_cow: bool, - Whether Copy on Write is enabled right now """ if not self.is_numeric or self.is_bool: - return self.copy(deep=not using_cow) - refs = None + return self.copy(deep=False) # TODO: round only defined on BaseMaskedArray # Series also does this, so would need to fix both places # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]" # has no attribute "round" values = self.values.round(decimals) # type: ignore[union-attr] + + refs = None if values is self.values: - if not using_cow: - # Normally would need to do this before, but - # numpy only returns same array when round operation - # is no-op - # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636 - values = values.copy() - else: - refs = self.refs + refs = self.refs + return self.make_block_same_class(values, refs=refs) # --------------------------------------------------------------------- @@ -1923,7 +1896,7 @@ def shift(self, periods: int, fill_value: Any = None) -> list[Block]: return [self.make_block_same_class(new_values)] @final - def setitem(self, indexer, value, using_cow: bool = False): + def setitem(self, indexer, value): """ Attempt self.values[indexer] = value, possibly creating a new array. @@ -1936,8 +1909,6 @@ def setitem(self, indexer, value, using_cow: bool = False): The subset of self.values to set value : object The value being set - using_cow: bool, default False - Signaling if CoW is used. Returns ------- @@ -1980,9 +1951,7 @@ def setitem(self, indexer, value, using_cow: bool = False): return self @final - def where( - self, other, cond, _downcast: str | bool = "infer", using_cow: bool = False - ) -> list[Block]: + def where(self, other, cond, _downcast: str | bool = "infer") -> list[Block]: # _downcast private bc we only specify it when calling from fillna arr = self.values.T @@ -2000,9 +1969,7 @@ def where( if noop: # GH#44181, GH#45135 # Avoid a) raising for Interval/PeriodDtype and b) unnecessary object upcast - if using_cow: - return [self.copy(deep=False)] - return [self.copy()] + return [self.copy(deep=False)] try: res_values = arr._where(cond, other).T @@ -2011,19 +1978,15 @@ def where( if isinstance(self.dtype, IntervalDtype): # TestSetitemFloatIntervalWithIntIntervalValues blk = self.coerce_to_target_dtype(orig_other) - nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) - return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow, caller="where" - ) + nbs = blk.where(orig_other, orig_cond) + return self._maybe_downcast(nbs, downcast=_downcast, caller="where") elif isinstance(self, NDArrayBackedExtensionBlock): # NB: not (yet) the same as # isinstance(values, NDArrayBackedExtensionArray) blk = self.coerce_to_target_dtype(orig_other) - nbs = blk.where(orig_other, orig_cond, using_cow=using_cow) - return self._maybe_downcast( - nbs, downcast=_downcast, using_cow=using_cow, caller="where" - ) + nbs = blk.where(orig_other, orig_cond) + return self._maybe_downcast(nbs, downcast=_downcast, caller="where") else: raise @@ -2041,7 +2004,7 @@ def where( n = orig_other[:, i : i + 1] submask = orig_cond[:, i : i + 1] - rbs = nb.where(n, submask, using_cow=using_cow) + rbs = nb.where(n, submask) res_blocks.extend(rbs) return res_blocks @@ -2049,7 +2012,7 @@ def where( return [nb] @final - def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: + def putmask(self, mask, new) -> list[Block]: """ See Block.putmask.__doc__ """ @@ -2063,11 +2026,9 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]: mask = self._maybe_squeeze_arg(mask) if not mask.any(): - if using_cow: - return [self.copy(deep=False)] - return [self] + return [self.copy(deep=False)] - self = self._maybe_copy(using_cow, inplace=True) + self = self._maybe_copy(inplace=True) values = self.values if values.ndim == 2: values = values.T @@ -2149,7 +2110,6 @@ def pad_or_backfill( limit: int | None = None, limit_area: Literal["inside", "outside"] | None = None, downcast: Literal["infer"] | None = None, - using_cow: bool = False, ) -> list[Block]: values = self.values @@ -2191,7 +2151,6 @@ def fillna( limit: int | None = None, inplace: bool = False, downcast=None, - using_cow: bool = False, ) -> list[Block]: if isinstance(self.dtype, IntervalDtype): # Block.fillna handles coercion (test_fillna_interval) @@ -2200,13 +2159,12 @@ def fillna( limit=limit, inplace=inplace, downcast=downcast, - using_cow=using_cow, ) - if using_cow and self._can_hold_na and not self.values._hasna: + if self._can_hold_na and not self.values._hasna: refs = self.refs new_values = self.values else: - copy, refs = self._get_refs_and_copy(using_cow, inplace) + copy, refs = self._get_refs_and_copy(inplace) try: new_values = self.values.fillna( @@ -2230,7 +2188,7 @@ def fillna( ) nb = self.make_block_same_class(new_values, refs=refs) - return nb._maybe_downcast([nb], downcast, using_cow=using_cow, caller="fillna") + return nb._maybe_downcast([nb], downcast, caller="fillna") @cache_readonly def shape(self) -> Shape: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5a8a14168d504..cda5575a2b04e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -470,7 +470,6 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: limit=limit, inplace=inplace, downcast=downcast, - using_cow=using_copy_on_write(), ) @final @@ -486,7 +485,6 @@ def where(self, other, cond, align: bool) -> Self: align_keys=align_keys, other=other, cond=cond, - using_cow=using_copy_on_write(), ) @final @@ -502,16 +500,11 @@ def putmask(self, mask, new, align: bool = True) -> Self: align_keys=align_keys, mask=mask, new=new, - using_cow=using_copy_on_write(), ) @final - def round(self, decimals: int, using_cow: bool = False) -> Self: - return self.apply( - "round", - decimals=decimals, - using_cow=using_cow, - ) + def round(self, decimals: int) -> Self: + return self.apply("round", decimals=decimals) @final def replace(self, to_replace, value, inplace: bool) -> Self: @@ -558,20 +551,10 @@ def replace_list( return bm def interpolate(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "interpolate", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - ) + return self.apply("interpolate", inplace=inplace, **kwargs) def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: - return self.apply( - "pad_or_backfill", - inplace=inplace, - **kwargs, - using_cow=using_copy_on_write(), - ) + return self.apply("pad_or_backfill", inplace=inplace, **kwargs) def shift(self, periods: int, fill_value) -> Self: if fill_value is lib.no_default: @@ -622,21 +605,7 @@ def diff(self, n: int) -> Self: return self.apply("diff", n=n) def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: - if copy is None: - if using_copy_on_write(): - copy = False - else: - copy = True - elif using_copy_on_write(): - copy = False - - return self.apply( - "astype", - dtype=dtype, - copy=copy, - errors=errors, - using_cow=using_copy_on_write(), - ) + return self.apply("astype", dtype=dtype, errors=errors) def convert(self, copy: bool | None) -> Self: if copy is None: @@ -650,14 +619,7 @@ def convert(self, copy: bool | None) -> Self: return self.apply("convert", copy=copy, using_cow=using_copy_on_write()) def convert_dtypes(self, **kwargs): - if using_copy_on_write(): - copy = False - else: - copy = True - - return self.apply( - "convert_dtypes", copy=copy, using_cow=using_copy_on_write(), **kwargs - ) + return self.apply("convert_dtypes", **kwargs) def get_values_for_csv( self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None diff --git a/pandas/core/series.py b/pandas/core/series.py index d3c199286931f..78a3bdd2281ce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2671,7 +2671,7 @@ def round(self, decimals: int = 0, *args, **kwargs) -> Series: dtype: float64 """ nv.validate_round(args, kwargs) - new_mgr = self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()) + new_mgr = self._mgr.round(decimals=decimals) return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__( self, method="round" ) From 46fd7114d6ded7dcd58499776175d4ccfd04d9da Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 5 Feb 2024 02:01:35 +0000 Subject: [PATCH 43/50] CoW: Remove copy-on-write conditions from frame tests (#57255) --- pandas/tests/frame/indexing/test_getitem.py | 9 +--- pandas/tests/frame/indexing/test_indexing.py | 47 ++++------------- pandas/tests/frame/indexing/test_insert.py | 13 ++--- pandas/tests/frame/indexing/test_setitem.py | 35 ++++--------- pandas/tests/frame/indexing/test_xs.py | 32 ++++-------- pandas/tests/frame/methods/test_align.py | 7 +-- pandas/tests/frame/methods/test_cov_corr.py | 13 ++--- pandas/tests/frame/methods/test_fillna.py | 39 +++----------- .../tests/frame/methods/test_interpolate.py | 47 ++++++----------- pandas/tests/frame/methods/test_quantile.py | 13 ++--- pandas/tests/frame/methods/test_reindex.py | 19 ++----- pandas/tests/frame/methods/test_rename.py | 7 +-- pandas/tests/frame/methods/test_set_axis.py | 29 ++--------- .../tests/frame/methods/test_sort_values.py | 13 ++--- .../frame/methods/test_to_dict_of_blocks.py | 21 ++------ pandas/tests/frame/methods/test_to_numpy.py | 15 ++---- pandas/tests/frame/methods/test_transpose.py | 15 ++---- pandas/tests/frame/methods/test_update.py | 16 ++---- pandas/tests/frame/methods/test_values.py | 26 +++------- pandas/tests/frame/test_api.py | 8 +-- pandas/tests/frame/test_arithmetic.py | 12 ++--- pandas/tests/frame/test_block_internals.py | 52 ++++--------------- pandas/tests/frame/test_constructors.py | 39 +++----------- 23 files changed, 129 insertions(+), 398 deletions(-) diff --git a/pandas/tests/frame/indexing/test_getitem.py b/pandas/tests/frame/indexing/test_getitem.py index 73683922bcc92..25d6e06a4c675 100644 --- a/pandas/tests/frame/indexing/test_getitem.py +++ b/pandas/tests/frame/indexing/test_getitem.py @@ -391,18 +391,13 @@ def test_getitem_empty_frame_with_boolean(self): df2 = df[df > 0] tm.assert_frame_equal(df, df2) - def test_getitem_returns_view_when_column_is_unique_in_df( - self, using_copy_on_write - ): + def test_getitem_returns_view_when_column_is_unique_in_df(self): # GH#45316 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "a", "b"]) df_orig = df.copy() view = df["b"] view.loc[:] = 100 - if using_copy_on_write: - expected = df_orig - else: - expected = DataFrame([[1, 2, 100], [4, 5, 100]], columns=["a", "a", "b"]) + expected = df_orig tm.assert_frame_equal(df, expected) def test_getitem_frozenset_unique_in_column(self): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index b48ad7e3481b9..97176b20376ff 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -565,9 +565,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self): with pytest.raises(KeyError, match=r"^3$"): df2.loc[3:11] = 0 - def test_fancy_getitem_slice_mixed( - self, float_frame, float_string_frame, using_copy_on_write - ): + def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): sliced = float_string_frame.iloc[:, -3:] assert sliced["D"].dtype == np.float64 @@ -579,13 +577,7 @@ def test_fancy_getitem_slice_mixed( assert np.shares_memory(sliced["C"]._values, float_frame["C"]._values) sliced.loc[:, "C"] = 4.0 - if not using_copy_on_write: - assert (float_frame["C"] == 4).all() - - # with the enforcement of GH#45333 in 2.0, this remains a view - np.shares_memory(sliced["C"]._values, float_frame["C"]._values) - else: - tm.assert_frame_equal(float_frame, original) + tm.assert_frame_equal(float_frame, original) def test_getitem_setitem_non_ix_labels(self): df = DataFrame(range(20), index=date_range("2020-01-01", periods=20)) @@ -1053,7 +1045,7 @@ def test_iloc_row(self): expected = df.reindex(df.index[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_row_slice_view(self, using_copy_on_write): + def test_iloc_row_slice_view(self): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), index=range(0, 20, 2) ) @@ -1067,11 +1059,6 @@ def test_iloc_row_slice_view(self, using_copy_on_write): exp_col = original[2].copy() subset.loc[:, 2] = 0.0 - if not using_copy_on_write: - exp_col._values[4:8] = 0.0 - - # With the enforcement of GH#45333 in 2.0, this remains a view - assert np.shares_memory(df[2], subset[2]) tm.assert_series_equal(df[2], exp_col) def test_iloc_col(self): @@ -1097,32 +1084,20 @@ def test_iloc_col(self): expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) - def test_iloc_col_slice_view(self, using_copy_on_write): + def test_iloc_col_slice_view(self): df = DataFrame( np.random.default_rng(2).standard_normal((4, 10)), columns=range(0, 20, 2) ) original = df.copy() subset = df.iloc[:, slice(4, 8)] - if not using_copy_on_write: - # verify slice is view - assert np.shares_memory(df[8]._values, subset[8]._values) - - subset.loc[:, 8] = 0.0 - - assert (df[8] == 0).all() - - # with the enforcement of GH#45333 in 2.0, this remains a view - assert np.shares_memory(df[8]._values, subset[8]._values) - else: - if using_copy_on_write: - # verify slice is view - assert np.shares_memory(df[8]._values, subset[8]._values) - subset[8] = 0.0 - # subset changed - assert (subset[8] == 0).all() - # but df itself did not change (setitem replaces full column) - tm.assert_frame_equal(df, original) + # verify slice is view + assert np.shares_memory(df[8]._values, subset[8]._values) + subset[8] = 0.0 + # subset changed + assert (subset[8] == 0).all() + # but df itself did not change (setitem replaces full column) + tm.assert_frame_equal(df, original) def test_loc_duplicates(self): # gh-17105 diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index b9fc5dc195026..2558e8314664a 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -71,7 +71,7 @@ def test_insert_with_columns_dups(self): ) tm.assert_frame_equal(df, exp) - def test_insert_item_cache(self, using_copy_on_write): + def test_insert_item_cache(self): df = DataFrame(np.random.default_rng(2).standard_normal((4, 3))) ser = df[0] expected_warning = PerformanceWarning @@ -80,14 +80,9 @@ def test_insert_item_cache(self, using_copy_on_write): for n in range(100): df[n + 3] = df[1] * n - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df[0][0] - assert df.iloc[0, 0] != 99 - else: - ser.values[0] = 99 - assert df.iloc[0, 0] == df[0][0] - assert df.iloc[0, 0] == 99 + ser.iloc[0] = 99 + assert df.iloc[0, 0] == df[0][0] + assert df.iloc[0, 0] != 99 def test_insert_EA_no_warning(self): # PerformanceWarning about fragmented frame should not be raised when diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 2df01b2cdb721..20e7651f8af83 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -324,7 +324,7 @@ def test_frame_setitem_existing_datetime64_col_other_units(self, unit): df["dates"] = vals assert (df["dates"].values == ex_vals).all() - def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): + def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame idx = df["B"].rename("foo") @@ -345,10 +345,7 @@ def test_setitem_dt64tz(self, timezone_frame, using_copy_on_write): tm.assert_extension_array_equal(v1, v2) v1base = v1._ndarray.base v2base = v2._ndarray.base - if not using_copy_on_write: - assert v1base is None or (id(v1base) != id(v2base)) - else: - assert id(v1base) == id(v2base) + assert id(v1base) == id(v2base) # with nan df2 = df.copy() @@ -844,7 +841,7 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected): class TestDataFrameSetItemWithExpansion: - def test_setitem_listlike_views(self, using_copy_on_write): + def test_setitem_listlike_views(self): # GH#38148 df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]}) @@ -857,10 +854,7 @@ def test_setitem_listlike_views(self, using_copy_on_write): # edit in place the first column to check view semantics df.iloc[0, 0] = 100 - if using_copy_on_write: - expected = Series([1, 2, 3], name="a") - else: - expected = Series([100, 2, 3], name="a") + expected = Series([1, 2, 3], name="a") tm.assert_series_equal(ser, expected) def test_setitem_string_column_numpy_dtype_raising(self): @@ -870,7 +864,7 @@ def test_setitem_string_column_numpy_dtype_raising(self): expected = DataFrame([[1, 2, 5], [3, 4, 6]], columns=[0, 1, "0 - Name"]) tm.assert_frame_equal(df, expected) - def test_setitem_empty_df_duplicate_columns(self, using_copy_on_write): + def test_setitem_empty_df_duplicate_columns(self): # GH#38521 df = DataFrame(columns=["a", "b", "b"], dtype="float64") df.loc[:, "a"] = list(range(2)) @@ -1199,7 +1193,7 @@ def test_setitem_always_copy(self, float_frame): assert notna(s[5:10]).all() @pytest.mark.parametrize("consolidate", [True, False]) - def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write): + def test_setitem_partial_column_inplace(self, consolidate): # This setting should be in-place, regardless of whether frame is # single-block or multi-block # GH#304 this used to be incorrectly not-inplace, in which case @@ -1215,18 +1209,11 @@ def test_setitem_partial_column_inplace(self, consolidate, using_copy_on_write): else: assert len(df._mgr.blocks) == 2 - zvals = df["z"]._values - df.loc[2:, "z"] = 42 expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") tm.assert_series_equal(df["z"], expected) - # check setting occurred in-place - if not using_copy_on_write: - tm.assert_numpy_array_equal(zvals, expected.values) - assert np.shares_memory(zvals, df["z"]._values) - def test_setitem_duplicate_columns_not_inplace(self): # GH#39510 cols = ["A", "B"] * 2 @@ -1298,7 +1285,7 @@ def test_setitem_not_operating_inplace(self, value, set_value, indexer): df[indexer] = set_value tm.assert_frame_equal(view, expected) - def test_setitem_column_update_inplace(self, using_copy_on_write): + def test_setitem_column_update_inplace(self): # https://github.com/pandas-dev/pandas/issues/47172 labels = [f"c{i}" for i in range(10)] @@ -1308,12 +1295,8 @@ def test_setitem_column_update_inplace(self, using_copy_on_write): with tm.raises_chained_assignment_error(): for label in df.columns: df[label][label] = 1 - if not using_copy_on_write: - # diagonal values all updated - assert np.all(values[np.arange(10), np.arange(10)] == 1) - else: - # original dataframe not updated - assert np.all(values[np.arange(10), np.arange(10)] == 0) + # original dataframe not updated + assert np.all(values[np.arange(10), np.arange(10)] == 0) def test_setitem_column_frame_as_category(self): # GH31581 diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 96ae1050ed15a..4878f74bd152e 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -58,7 +58,7 @@ def test_xs_dt_error(self, datetime_frame): ): datetime_frame.xs(datetime_frame.index[0] - BDay()) - def test_xs_other(self, float_frame, using_copy_on_write): + def test_xs_other(self, float_frame): float_frame_orig = float_frame.copy() # xs get column series = float_frame.xs("A", axis=1) @@ -68,12 +68,9 @@ def test_xs_other(self, float_frame, using_copy_on_write): # view is returned if possible series = float_frame.xs("A", axis=1) series[:] = 5 - if using_copy_on_write: - # but with CoW the view shouldn't propagate mutations - tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) - assert not (expected == 5).all() - else: - assert (expected == 5).all() + # The view shouldn't propagate mutations + tm.assert_series_equal(float_frame["A"], float_frame_orig["A"]) + assert not (expected == 5).all() def test_xs_corner(self): # pathological mixed-type reordering case @@ -363,7 +360,7 @@ def test_xs_droplevel_false(self): expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - def test_xs_droplevel_false_view(self, using_copy_on_write): + def test_xs_droplevel_false_view(self): # GH#37832 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) @@ -371,26 +368,15 @@ def test_xs_droplevel_false_view(self, using_copy_on_write): assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values) df.iloc[0, 0] = 2 - if using_copy_on_write: - # with copy on write the subset is never modified - expected = DataFrame({"a": [1]}) - else: - # modifying original df also modifies result when having a single block - expected = DataFrame({"a": [2]}) + # The subset is never modified + expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) - # with mixed dataframe, modifying the parent doesn't modify result - # TODO the "split" path behaves differently here as with single block df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"])) result = df.xs("a", axis=1, drop_level=False) df.iloc[0, 0] = 2 - if using_copy_on_write: - # with copy on write the subset is never modified - expected = DataFrame({"a": [1]}) - else: - # FIXME: iloc does not update the array inplace using - # "split" path - expected = DataFrame({"a": [1]}) + # The subset is never modified + expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) def test_xs_list_indexer_droplevel_false(self): diff --git a/pandas/tests/frame/methods/test_align.py b/pandas/tests/frame/methods/test_align.py index 1f5d960de40c1..aa539dd0b2dbe 100644 --- a/pandas/tests/frame/methods/test_align.py +++ b/pandas/tests/frame/methods/test_align.py @@ -48,15 +48,12 @@ def test_frame_align_aware(self): assert new1.index.tz is timezone.utc assert new2.index.tz is timezone.utc - def test_align_float(self, float_frame, using_copy_on_write): + def test_align_float(self, float_frame): af, bf = float_frame.align(float_frame) assert af._mgr is not float_frame._mgr af, bf = float_frame.align(float_frame, copy=False) - if not using_copy_on_write: - assert af._mgr is float_frame._mgr - else: - assert af._mgr is not float_frame._mgr + assert af._mgr is not float_frame._mgr # axis = 0 other = float_frame.iloc[:-5, :3] diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py index 2a50137c2d6ef..8e73fbf152e79 100644 --- a/pandas/tests/frame/methods/test_cov_corr.py +++ b/pandas/tests/frame/methods/test_cov_corr.py @@ -207,7 +207,7 @@ def test_corr_nullable_integer(self, nullable_column, other_column, method): expected = DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"]) tm.assert_frame_equal(result, expected) - def test_corr_item_cache(self, using_copy_on_write): + def test_corr_item_cache(self): # Check that corr does not lead to incorrect entries in item_cache df = DataFrame({"A": range(10)}) @@ -218,15 +218,8 @@ def test_corr_item_cache(self, using_copy_on_write): _ = df.corr(numeric_only=True) - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.loc[0, "A"] == 0 - else: - # Check that the corr didn't break link between ser and df - ser.values[0] = 99 - assert df.loc[0, "A"] == 99 - assert df["A"] is ser - assert df.values[0, 0] == 99 + ser.iloc[0] = 99 + assert df.loc[0, "A"] == 0 @pytest.mark.parametrize("length", [2, 20, 200, 2000]) def test_corr_for_constant_columns(self, length): diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index df38ddc6c3116..efb462416e132 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -20,7 +20,7 @@ class TestFillNA: - def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): + def test_fillna_dict_inplace_nonunique_columns(self): df = DataFrame( {"A": [np.nan] * 3, "B": [NaT, Timestamp(1), NaT], "C": [np.nan, "foo", 2]} ) @@ -35,27 +35,16 @@ def test_fillna_dict_inplace_nonunique_columns(self, using_copy_on_write): ) expected.columns = ["A", "A", "A"] tm.assert_frame_equal(df, expected) - - # TODO: what's the expected/desired behavior with CoW? - if not using_copy_on_write: - assert tm.shares_memory(df.iloc[:, 0], orig.iloc[:, 0]) assert not tm.shares_memory(df.iloc[:, 1], orig.iloc[:, 1]) - if not using_copy_on_write: - assert tm.shares_memory(df.iloc[:, 2], orig.iloc[:, 2]) - def test_fillna_on_column_view(self, using_copy_on_write): + def test_fillna_on_column_view(self): # GH#46149 avoid unnecessary copies arr = np.full((40, 50), np.nan) df = DataFrame(arr, copy=False) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df[0].fillna(-1, inplace=True) - assert np.isnan(arr[:, 0]).all() - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df[0].fillna(-1, inplace=True) - assert (arr[:, 0] == -1).all() + with tm.raises_chained_assignment_error(): + df[0].fillna(-1, inplace=True) + assert np.isnan(arr[:, 0]).all() # i.e. we didn't create a new 49-column block assert len(df._mgr.arrays) == 1 @@ -107,17 +96,6 @@ def test_fillna_mixed_float(self, mixed_float_frame): result = mf.fillna(method="pad") _check_mixed_float(result, dtype={"C": None}) - def test_fillna_empty(self, using_copy_on_write): - if using_copy_on_write: - pytest.skip("condition is unnecessary complex and is deprecated anyway") - # empty frame (GH#2778) - df = DataFrame(columns=["x"]) - for m in ["pad", "backfill"]: - msg = "Series.fillna with 'method' is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - df.x.fillna(method=m, inplace=True) - df.x.fillna(method=m) - def test_fillna_different_dtype(self, using_infer_string): # with different dtype (GH#3386) df = DataFrame( @@ -746,7 +724,7 @@ def test_fillna_inplace_with_columns_limit_and_value(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("val", [-1, {"x": -1, "y": -1}]) - def test_inplace_dict_update_view(self, val, using_copy_on_write): + def test_inplace_dict_update_view(self, val): # GH#47188 df = DataFrame({"x": [np.nan, 2], "y": [np.nan, 2]}) df_orig = df.copy() @@ -754,10 +732,7 @@ def test_inplace_dict_update_view(self, val, using_copy_on_write): df.fillna(val, inplace=True) expected = DataFrame({"x": [-1, 2.0], "y": [-1.0, 2]}) tm.assert_frame_equal(df, expected) - if using_copy_on_write: - tm.assert_frame_equal(result_view, df_orig) - else: - tm.assert_frame_equal(result_view, expected) + tm.assert_frame_equal(result_view, df_orig) def test_single_block_df_with_horizontal_axis(self): # GH 47713 diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 5eb9aee2ffb15..483194a46ce56 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -68,7 +68,7 @@ def test_interpolate_inplace(self, frame_or_series, request): @pytest.mark.xfail( using_pyarrow_string_dtype(), reason="interpolate doesn't work for string" ) - def test_interp_basic(self, using_copy_on_write): + def test_interp_basic(self): df = DataFrame( { "A": [1, 2, np.nan, 4], @@ -93,12 +93,8 @@ def test_interp_basic(self, using_copy_on_write): # check we didn't operate inplace GH#45791 cvalues = df["C"]._values dvalues = df["D"].values - if using_copy_on_write: - assert np.shares_memory(cvalues, result["C"]._values) - assert np.shares_memory(dvalues, result["D"]._values) - else: - assert not np.shares_memory(cvalues, result["C"]._values) - assert not np.shares_memory(dvalues, result["D"]._values) + assert np.shares_memory(cvalues, result["C"]._values) + assert np.shares_memory(dvalues, result["D"]._values) with tm.assert_produces_warning(FutureWarning, match=msg): res = df.interpolate(inplace=True) @@ -371,38 +367,25 @@ def test_interp_raise_on_all_object_dtype(self): with pytest.raises(TypeError, match=msg): df.interpolate() - def test_interp_inplace(self, using_copy_on_write): + def test_interp_inplace(self): df = DataFrame({"a": [1.0, 2.0, np.nan, 4.0]}) - expected = DataFrame({"a": [1.0, 2.0, 3.0, 4.0]}) - expected_cow = df.copy() + expected = df.copy() result = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - return_value = result["a"].interpolate(inplace=True) - assert return_value is None - tm.assert_frame_equal(result, expected_cow) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - return_value = result["a"].interpolate(inplace=True) - assert return_value is None - tm.assert_frame_equal(result, expected) + with tm.raises_chained_assignment_error(): + return_value = result["a"].interpolate(inplace=True) + assert return_value is None + tm.assert_frame_equal(result, expected) result = df.copy() msg = "The 'downcast' keyword in Series.interpolate is deprecated" - if using_copy_on_write: - with tm.assert_produces_warning( - (FutureWarning, ChainedAssignmentError), match=msg - ): - return_value = result["a"].interpolate(inplace=True, downcast="infer") - assert return_value is None - tm.assert_frame_equal(result, expected_cow) - else: - with tm.assert_produces_warning(FutureWarning, match=msg): - return_value = result["a"].interpolate(inplace=True, downcast="infer") - assert return_value is None - tm.assert_frame_equal(result, expected.astype("int64")) + with tm.assert_produces_warning( + (FutureWarning, ChainedAssignmentError), match=msg + ): + return_value = result["a"].interpolate(inplace=True, downcast="infer") + assert return_value is None + tm.assert_frame_equal(result, expected) def test_interp_inplace_row(self): # GH 10395 diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index e31e29b1b0cb2..48d55b2954360 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -723,7 +723,7 @@ def test_quantile_empty_no_columns(self, interp_method): expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) - def test_quantile_item_cache(self, interp_method, using_copy_on_write): + def test_quantile_item_cache(self, interp_method): # previous behavior incorrect retained an invalid _item_cache entry interpolation, method = interp_method df = DataFrame( @@ -735,14 +735,9 @@ def test_quantile_item_cache(self, interp_method, using_copy_on_write): df.quantile(numeric_only=False, interpolation=interpolation, method=method) - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] != 99 - else: - ser.values[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] == 99 + ser.iloc[0] = 99 + assert df.iloc[0, 0] == df["A"][0] + assert df.iloc[0, 0] != 99 def test_invalid_method(self): with pytest.raises(ValueError, match="Invalid method: foo"): diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index da6d69f36f900..76d80e87bdeb5 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -173,7 +173,7 @@ def test_reindex_copies(self): result2 = df.reindex(columns=cols, index=df.index, copy=True) assert not np.shares_memory(result2[0]._values, df[0]._values) - def test_reindex_copies_ea(self, using_copy_on_write): + def test_reindex_copies_ea(self): # https://github.com/pandas-dev/pandas/pull/51197 # also ensure to honor copy keyword for ExtensionDtypes N = 10 @@ -184,17 +184,11 @@ def test_reindex_copies_ea(self, using_copy_on_write): np.random.default_rng(2).shuffle(cols) result = df.reindex(columns=cols, copy=True) - if using_copy_on_write: - assert np.shares_memory(result[0].array._data, df[0].array._data) - else: - assert not np.shares_memory(result[0].array._data, df[0].array._data) + assert np.shares_memory(result[0].array._data, df[0].array._data) # pass both columns and index result2 = df.reindex(columns=cols, index=df.index, copy=True) - if using_copy_on_write: - assert np.shares_memory(result2[0].array._data, df[0].array._data) - else: - assert not np.shares_memory(result2[0].array._data, df[0].array._data) + assert np.shares_memory(result2[0].array._data, df[0].array._data) def test_reindex_date_fill_value(self): # passing date to dt64 is deprecated; enforced in 2.0 to cast to object @@ -602,7 +596,7 @@ def test_reindex_sparse(self): ) tm.assert_frame_equal(result, expected) - def test_reindex(self, float_frame, using_copy_on_write): + def test_reindex(self, float_frame): datetime_series = Series( np.arange(30, dtype=np.float64), index=date_range("2020-01-01", periods=30) ) @@ -644,10 +638,7 @@ def test_reindex(self, float_frame, using_copy_on_write): # Same index, copies values but not index if copy=False newFrame = float_frame.reindex(float_frame.index, copy=False) - if using_copy_on_write: - assert newFrame.index.is_(float_frame.index) - else: - assert newFrame.index is float_frame.index + assert newFrame.index.is_(float_frame.index) # length zero newFrame = float_frame.reindex([]) diff --git a/pandas/tests/frame/methods/test_rename.py b/pandas/tests/frame/methods/test_rename.py index b965a5d973fb6..996fc30552bc4 100644 --- a/pandas/tests/frame/methods/test_rename.py +++ b/pandas/tests/frame/methods/test_rename.py @@ -164,16 +164,13 @@ def test_rename_multiindex(self): renamed = df.rename(index={"foo1": "foo3", "bar2": "bar3"}, level=0) tm.assert_index_equal(renamed.index, new_index) - def test_rename_nocopy(self, float_frame, using_copy_on_write): + def test_rename_nocopy(self, float_frame): renamed = float_frame.rename(columns={"C": "foo"}, copy=False) assert np.shares_memory(renamed["foo"]._values, float_frame["C"]._values) renamed.loc[:, "foo"] = 1.0 - if using_copy_on_write: - assert not (float_frame["C"] == 1.0).all() - else: - assert (float_frame["C"] == 1.0).all() + assert not (float_frame["C"] == 1.0).all() def test_rename_inplace(self, float_frame): float_frame.rename(columns={"C": "foo"}) diff --git a/pandas/tests/frame/methods/test_set_axis.py b/pandas/tests/frame/methods/test_set_axis.py index 8d249bc7b7fa4..8c42498b45621 100644 --- a/pandas/tests/frame/methods/test_set_axis.py +++ b/pandas/tests/frame/methods/test_set_axis.py @@ -21,7 +21,7 @@ def test_set_axis(self, obj): result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) - def test_set_axis_copy(self, obj, using_copy_on_write): + def test_set_axis_copy(self, obj): # Test copy keyword GH#47932 new_index = list("abcd")[: len(obj)] @@ -32,16 +32,6 @@ def test_set_axis_copy(self, obj, using_copy_on_write): result = obj.set_axis(new_index, axis=0, copy=True) tm.assert_equal(expected, result) assert result is not obj - # check we DID make a copy - if not using_copy_on_write: - if obj.ndim == 1: - assert not tm.shares_memory(result, obj) - else: - assert not any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) - result = obj.set_axis(new_index, axis=0, copy=False) tm.assert_equal(expected, result) assert result is not obj @@ -58,20 +48,11 @@ def test_set_axis_copy(self, obj, using_copy_on_write): result = obj.set_axis(new_index, axis=0) tm.assert_equal(expected, result) assert result is not obj - if using_copy_on_write: - # check we DID NOT make a copy - if obj.ndim == 1: - assert tm.shares_memory(result, obj) - else: - assert any( - tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) - for i in range(obj.shape[1]) - ) - # check we DID make a copy - elif obj.ndim == 1: - assert not tm.shares_memory(result, obj) + # check we DID NOT make a copy + if obj.ndim == 1: + assert tm.shares_memory(result, obj) else: - assert not any( + assert any( tm.shares_memory(result.iloc[:, i], obj.iloc[:, i]) for i in range(obj.shape[1]) ) diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index 768c85644c977..c146dcc9c2d71 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -592,7 +592,7 @@ def test_sort_values_nat_na_position_default(self): result = expected.sort_values(["A", "date"]) tm.assert_frame_equal(result, expected) - def test_sort_values_item_cache(self, using_copy_on_write): + def test_sort_values_item_cache(self): # previous behavior incorrect retained an invalid _item_cache entry df = DataFrame( np.random.default_rng(2).standard_normal((4, 3)), columns=["A", "B", "C"] @@ -603,14 +603,9 @@ def test_sort_values_item_cache(self, using_copy_on_write): df.sort_values(by="A") - if using_copy_on_write: - ser.iloc[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] != 99 - else: - ser.values[0] = 99 - assert df.iloc[0, 0] == df["A"][0] - assert df.iloc[0, 0] == 99 + ser.iloc[0] = 99 + assert df.iloc[0, 0] == df["A"][0] + assert df.iloc[0, 0] != 99 def test_sort_values_reshaping(self): # GH 39426 diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index 19001f10e37e4..0f1f643209db0 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -10,8 +10,7 @@ class TestToDictOfBlocks: - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_no_copy_blocks(self, float_frame, using_copy_on_write): + def test_no_copy_blocks(self, float_frame): # GH#9607 df = DataFrame(float_frame, copy=True) column = df.columns[0] @@ -23,15 +22,10 @@ def test_no_copy_blocks(self, float_frame, using_copy_on_write): _last_df = _df if column in _df: _df.loc[:, column] = _df[column] + 1 + assert _last_df is not None and not _last_df[column].equals(df[column]) - if not using_copy_on_write: - # make sure we did change the original DataFrame - assert _last_df is not None and _last_df[column].equals(df[column]) - else: - assert _last_df is not None and not _last_df[column].equals(df[column]) - -def test_to_dict_of_blocks_item_cache(using_copy_on_write): +def test_to_dict_of_blocks_item_cache(): # Calling to_dict_of_blocks should not poison item_cache df = DataFrame({"a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"]}) df["c"] = NumpyExtensionArray(np.array([1, 2, None, 3], dtype=object)) @@ -42,15 +36,8 @@ def test_to_dict_of_blocks_item_cache(using_copy_on_write): df._to_dict_of_blocks() - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - ser.values[0] = "foo" - else: - # Check that the to_dict_of_blocks didn't break link between ser and df + with pytest.raises(ValueError, match="read-only"): ser.values[0] = "foo" - assert df.loc[0, "b"] == "foo" - - assert df["b"] is ser def test_set_change_dtype_slice(): diff --git a/pandas/tests/frame/methods/test_to_numpy.py b/pandas/tests/frame/methods/test_to_numpy.py index d92af2775922b..d38bc06260a0e 100644 --- a/pandas/tests/frame/methods/test_to_numpy.py +++ b/pandas/tests/frame/methods/test_to_numpy.py @@ -20,23 +20,16 @@ def test_to_numpy_dtype(self): result = df.to_numpy(dtype="int64") tm.assert_numpy_array_equal(result, expected) - def test_to_numpy_copy(self, using_copy_on_write): + def test_to_numpy_copy(self): arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame(arr) - if using_copy_on_write: - assert df.values.base is not arr - assert df.to_numpy(copy=False).base is df.values.base - else: - assert df.values.base is arr - assert df.to_numpy(copy=False).base is arr + assert df.values.base is not arr + assert df.to_numpy(copy=False).base is df.values.base assert df.to_numpy(copy=True).base is not arr # we still don't want a copy when na_value=np.nan is passed, # and that can be respected because we are already numpy-float - if using_copy_on_write: - assert df.to_numpy(copy=False).base is df.values.base - else: - assert df.to_numpy(copy=False, na_value=np.nan).base is arr + assert df.to_numpy(copy=False).base is df.values.base def test_to_numpy_mixed_dtype_to_str(self): # https://github.com/pandas-dev/pandas/issues/35455 diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index 45bd8ff0268a8..495663ce135f9 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -124,16 +124,12 @@ def test_transpose_mixed(self): for col, s in mixed_T.items(): assert s.dtype == np.object_ - def test_transpose_get_view(self, float_frame, using_copy_on_write): + def test_transpose_get_view(self, float_frame): dft = float_frame.T dft.iloc[:, 5:10] = 5 + assert (float_frame.values[5:10] != 5).all() - if using_copy_on_write: - assert (float_frame.values[5:10] != 5).all() - else: - assert (float_frame.values[5:10] == 5).all() - - def test_transpose_get_view_dt64tzget_view(self, using_copy_on_write): + def test_transpose_get_view_dt64tzget_view(self): dti = date_range("2016-01-01", periods=6, tz="US/Pacific") arr = dti._data.reshape(3, 2) df = DataFrame(arr) @@ -143,10 +139,7 @@ def test_transpose_get_view_dt64tzget_view(self, using_copy_on_write): assert result._mgr.nblocks == 1 rtrip = result._mgr.blocks[0].values - if using_copy_on_write: - assert np.shares_memory(df._mgr.blocks[0].values._ndarray, rtrip._ndarray) - else: - assert np.shares_memory(arr._ndarray, rtrip._ndarray) + assert np.shares_memory(df._mgr.blocks[0].values._ndarray, rtrip._ndarray) def test_transpose_not_inferring_dt(self): # GH#51546 diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 7ff8508c3b799..788c6220b2477 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -138,7 +138,7 @@ def test_update_datetime_tz(self): expected = DataFrame([pd.Timestamp("2019", tz="UTC")]) tm.assert_frame_equal(result, expected) - def test_update_datetime_tz_in_place(self, using_copy_on_write): + def test_update_datetime_tz_in_place(self): # https://github.com/pandas-dev/pandas/issues/56227 result = DataFrame([pd.Timestamp("2019", tz="UTC")]) orig = result.copy() @@ -146,12 +146,9 @@ def test_update_datetime_tz_in_place(self, using_copy_on_write): result.update(result + pd.Timedelta(days=1)) expected = DataFrame([pd.Timestamp("2019-01-02", tz="UTC")]) tm.assert_frame_equal(result, expected) - if not using_copy_on_write: - tm.assert_frame_equal(view, expected) - else: - tm.assert_frame_equal(view, orig) + tm.assert_frame_equal(view, orig) - def test_update_with_different_dtype(self, using_copy_on_write): + def test_update_with_different_dtype(self): # GH#3217 df = DataFrame({"a": [1, 3], "b": [np.nan, 2]}) df["c"] = np.nan @@ -167,7 +164,7 @@ def test_update_with_different_dtype(self, using_copy_on_write): ) tm.assert_frame_equal(df, expected) - def test_update_modify_view(self, using_copy_on_write, using_infer_string): + def test_update_modify_view(self, using_infer_string): # GH#47188 df = DataFrame({"A": ["1", np.nan], "B": ["100", np.nan]}) df2 = DataFrame({"A": ["a", "x"], "B": ["100", "200"]}) @@ -176,10 +173,7 @@ def test_update_modify_view(self, using_copy_on_write, using_infer_string): df2.update(df) expected = DataFrame({"A": ["1", "x"], "B": ["100", "200"]}) tm.assert_frame_equal(df2, expected) - if using_copy_on_write or using_infer_string: - tm.assert_frame_equal(result_view, df2_orig) - else: - tm.assert_frame_equal(result_view, expected) + tm.assert_frame_equal(result_view, df2_orig) def test_update_dt_column_with_NaT_create_column(self): # GH#16713 diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py index f1230e55f9054..dfece3fc7552b 100644 --- a/pandas/tests/frame/methods/test_values.py +++ b/pandas/tests/frame/methods/test_values.py @@ -13,14 +13,10 @@ class TestDataFrameValues: - def test_values(self, float_frame, using_copy_on_write): - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - float_frame.values[:, 0] = 5.0 - assert (float_frame.values[:, 0] != 5).all() - else: + def test_values(self, float_frame): + with pytest.raises(ValueError, match="read-only"): float_frame.values[:, 0] = 5.0 - assert (float_frame.values[:, 0] == 5).all() + assert (float_frame.values[:, 0] != 5).all() def test_more_values(self, float_string_frame): values = float_string_frame.values @@ -228,34 +224,26 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame): class TestPrivateValues: - def test_private_values_dt64tz(self, using_copy_on_write): + def test_private_values_dt64tz(self): dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1) df = DataFrame(dta, columns=["A"]) tm.assert_equal(df._values, dta) - if using_copy_on_write: - assert not np.shares_memory(df._values._ndarray, dta._ndarray) - else: - # we have a view - assert np.shares_memory(df._values._ndarray, dta._ndarray) + assert not np.shares_memory(df._values._ndarray, dta._ndarray) # TimedeltaArray tda = dta - dta df2 = df - df tm.assert_equal(df2._values, tda) - def test_private_values_dt64tz_multicol(self, using_copy_on_write): + def test_private_values_dt64tz_multicol(self): dta = date_range("2000", periods=8, tz="US/Central")._data.reshape(-1, 2) df = DataFrame(dta, columns=["A", "B"]) tm.assert_equal(df._values, dta) - if using_copy_on_write: - assert not np.shares_memory(df._values._ndarray, dta._ndarray) - else: - # we have a view - assert np.shares_memory(df._values._ndarray, dta._ndarray) + assert not np.shares_memory(df._values._ndarray, dta._ndarray) # TimedeltaArray tda = dta - dta diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 0112e0093c102..b849baa8cab62 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -325,7 +325,6 @@ def test_set_flags( self, allows_duplicate_labels, frame_or_series, - using_copy_on_write, ): obj = DataFrame({"A": [1, 2]}) key = (0, 0) @@ -354,12 +353,7 @@ def test_set_flags( assert np.may_share_memory(obj["A"].values, result["A"].values) result.iloc[key] = 0 - if using_copy_on_write: - assert obj.iloc[key] == 1 - else: - assert obj.iloc[key] == 0 - # set back to 1 for test below - result.iloc[key] = 1 + assert obj.iloc[key] == 1 # Now we do copy. result = obj.set_flags( diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 4fb0bbafc6879..fc40fd5329118 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2006,7 +2006,7 @@ def test_arith_list_of_arraylike_raise(to_add): to_add + df -def test_inplace_arithmetic_series_update(using_copy_on_write): +def test_inplace_arithmetic_series_update(): # https://github.com/pandas-dev/pandas/issues/36373 df = DataFrame({"A": [1, 2, 3]}) df_orig = df.copy() @@ -2014,14 +2014,8 @@ def test_inplace_arithmetic_series_update(using_copy_on_write): vals = series._values series += 1 - if using_copy_on_write: - assert series._values is not vals - tm.assert_frame_equal(df, df_orig) - else: - assert series._values is vals - - expected = DataFrame({"A": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) + assert series._values is not vals + tm.assert_frame_equal(df, df_orig) def test_arithmetic_multiindex_align(): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 36013e1ac949f..78365ad4a0004 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -81,25 +81,10 @@ def test_consolidate_inplace(self, float_frame): for letter in range(ord("A"), ord("Z")): float_frame[chr(letter)] = chr(letter) - def test_modify_values(self, float_frame, using_copy_on_write): - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - float_frame.values[5] = 5 - assert (float_frame.values[5] != 5).all() - return - - float_frame.values[5] = 5 - assert (float_frame.values[5] == 5).all() - - # unconsolidated - float_frame["E"] = 7.0 - col = float_frame["E"] - float_frame.values[6] = 6 - # as of 2.0 .values does not consolidate, so subsequent calls to .values - # does not share data - assert not (float_frame.values[6] == 6).all() - - assert (col == 7).all() + def test_modify_values(self, float_frame): + with pytest.raises(ValueError, match="read-only"): + float_frame.values[5] = 5 + assert (float_frame.values[5] != 5).all() def test_boolean_set_uncons(self, float_frame): float_frame["E"] = 7.0 @@ -332,7 +317,7 @@ def test_is_mixed_type(self, float_frame, float_string_frame): assert not float_frame._is_mixed_type assert float_string_frame._is_mixed_type - def test_stale_cached_series_bug_473(self, using_copy_on_write): + def test_stale_cached_series_bug_473(self): # this is chained, but ok with option_context("chained_assignment", None): Y = DataFrame( @@ -347,13 +332,9 @@ def test_stale_cached_series_bug_473(self, using_copy_on_write): repr(Y) Y.sum() Y["g"].sum() - if using_copy_on_write: - assert not pd.isna(Y["g"]["c"]) - else: - assert pd.isna(Y["g"]["c"]) + assert not pd.isna(Y["g"]["c"]) - @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") - def test_strange_column_corruption_issue(self, using_copy_on_write): + def test_strange_column_corruption_issue(self): # TODO(wesm): Unclear how exactly this is related to internal matters df = DataFrame(index=[0, 1]) df[0] = np.nan @@ -367,10 +348,7 @@ def test_strange_column_corruption_issue(self, using_copy_on_write): if col not in wasCol: wasCol[col] = 1 df[col] = np.nan - if using_copy_on_write: - df.loc[dt, col] = i - else: - df[col][dt] = i + df.loc[dt, col] = i myid = 100 @@ -408,25 +386,17 @@ def test_add_column_with_pandas_array(self): tm.assert_frame_equal(df, df2) -def test_update_inplace_sets_valid_block_values(using_copy_on_write): +def test_update_inplace_sets_valid_block_values(): # https://github.com/pandas-dev/pandas/issues/33457 df = DataFrame({"a": Series([1, 2, None], dtype="category")}) # inplace update of a single column - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["a"].fillna(1, inplace=True) - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["a"].fillna(1, inplace=True) + with tm.raises_chained_assignment_error(): + df["a"].fillna(1, inplace=True) # check we haven't put a Series into any block.values assert isinstance(df._mgr.blocks[0].values, Categorical) - if not using_copy_on_write: - # smoketest for OP bug from GH#35731 - assert df.isnull().sum().sum() == 0 - def test_nonconsolidated_item_cache_take(): # https://github.com/pandas-dev/pandas/issues/35521 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 20f147e94c514..2bbb20c842dba 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -287,25 +287,16 @@ def test_constructor_dtype_copy(self): new_df["col1"] = 200.0 assert orig_df["col1"][0] == 1.0 - def test_constructor_dtype_nocast_view_dataframe(self, using_copy_on_write): + def test_constructor_dtype_nocast_view_dataframe(self): df = DataFrame([[1, 2]]) should_be_view = DataFrame(df, dtype=df[0].dtype) - if using_copy_on_write: - should_be_view.iloc[0, 0] = 99 - assert df.values[0, 0] == 1 - else: - should_be_view.iloc[0, 0] = 99 - assert df.values[0, 0] == 99 + should_be_view.iloc[0, 0] = 99 + assert df.values[0, 0] == 1 - def test_constructor_dtype_nocast_view_2d_array(self, using_copy_on_write): + def test_constructor_dtype_nocast_view_2d_array(self): df = DataFrame([[1, 2], [3, 4]], dtype="int64") - if not using_copy_on_write: - should_be_view = DataFrame(df.values, dtype=df[0].dtype) - should_be_view.iloc[0, 0] = 97 - assert df.values[0, 0] == 97 - else: - df2 = DataFrame(df.values, dtype=df[0].dtype) - assert df2._mgr.arrays[0].flags.c_contiguous + df2 = DataFrame(df.values, dtype=df[0].dtype) + assert df2._mgr.arrays[0].flags.c_contiguous @pytest.mark.xfail(using_pyarrow_string_dtype(), reason="conversion copies") def test_1d_object_array_does_not_copy(self): @@ -2127,16 +2118,12 @@ def test_constructor_frame_shallow_copy(self, float_frame): cop.index = np.arange(len(cop)) tm.assert_frame_equal(float_frame, orig) - def test_constructor_ndarray_copy(self, float_frame, using_copy_on_write): + def test_constructor_ndarray_copy(self, float_frame): arr = float_frame.values.copy() df = DataFrame(arr) arr[5] = 5 - if using_copy_on_write: - assert not (df.values[5] == 5).all() - else: - assert (df.values[5] == 5).all() - + assert not (df.values[5] == 5).all() df = DataFrame(arr, copy=True) arr[6] = 6 assert not (df.values[6] == 6).all() @@ -2473,7 +2460,6 @@ def test_dict_nocopy( copy, any_numeric_ea_dtype, any_numpy_dtype, - using_copy_on_write, ): a = np.array([1, 2], dtype=any_numpy_dtype) b = np.array([3, 4], dtype=any_numpy_dtype) @@ -2541,9 +2527,6 @@ def check_views(c_only: bool = False): # view, so we have to check in the other direction df.iloc[:, 2] = pd.array([45, 46], dtype=c.dtype) assert df.dtypes.iloc[2] == c.dtype - if not copy and not using_copy_on_write: - check_views(True) - if copy: if a.dtype.kind == "M": assert a[0] == a.dtype.type(1, "ns") @@ -2553,12 +2536,6 @@ def check_views(c_only: bool = False): assert b[0] == b.dtype.type(3) # FIXME(GH#35417): enable after GH#35417 assert c[0] == c_orig[0] # i.e. df.iloc[0, 2]=45 did *not* update c - elif not using_copy_on_write: - # TODO: we can call check_views if we stop consolidating - # in setitem_with_indexer - assert c[0] == 45 # i.e. df.iloc[0, 2]=45 *did* update c - # TODO: we can check b[0] == 0 if we stop consolidating in - # setitem_with_indexer (except for datetimelike?) def test_construct_from_dict_ea_series(self): # GH#53744 - default of copy=True should also apply for Series with From c0d235fce67d0ece8da269332b0147dc062b091a Mon Sep 17 00:00:00 2001 From: Xiao Yuan Date: Tue, 6 Feb 2024 01:24:50 +0800 Subject: [PATCH 44/50] DOC: fix PR02 errors in docstrings of Index subclasses (#57261) * DOC: fix PR02 errors in docstrings of Index subclasses * mypy --- ci/code_checks.sh | 20 -------------------- pandas/core/indexes/extension.py | 2 ++ 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index cebc8e976425b..5b65182d1c253 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -71,26 +71,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Partially validate docstrings (PR02)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=PR02 --ignore_functions \ - pandas.CategoricalIndex.rename_categories\ - pandas.CategoricalIndex.reorder_categories\ - pandas.CategoricalIndex.add_categories\ - pandas.CategoricalIndex.remove_categories\ - pandas.CategoricalIndex.set_categories\ - pandas.IntervalIndex.set_closed\ - pandas.IntervalIndex.contains\ - pandas.IntervalIndex.overlaps\ - pandas.IntervalIndex.to_tuples\ - pandas.DatetimeIndex.round\ - pandas.DatetimeIndex.floor\ - pandas.DatetimeIndex.ceil\ - pandas.DatetimeIndex.month_name\ - pandas.DatetimeIndex.day_name\ - pandas.DatetimeIndex.to_period\ - pandas.DatetimeIndex.std\ - pandas.TimedeltaIndex.round\ - pandas.TimedeltaIndex.floor\ - pandas.TimedeltaIndex.ceil\ - pandas.PeriodIndex.strftime\ pandas.Series.dt.to_period\ pandas.Series.dt.tz_localize\ pandas.Series.dt.tz_convert\ diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index 61949531f37df..d6fbeb9043bc6 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -3,6 +3,7 @@ """ from __future__ import annotations +from inspect import signature from typing import ( TYPE_CHECKING, Callable, @@ -104,6 +105,7 @@ def method(self, *args, **kwargs): # type: ignore[misc] # error: "property" has no attribute "__name__" method.__name__ = name # type: ignore[attr-defined] method.__doc__ = attr.__doc__ + method.__signature__ = signature(attr) # type: ignore[attr-defined] return method From 3c535422cc41198fb55304ccfd6672940bb1a5f4 Mon Sep 17 00:00:00 2001 From: jrmylow <33999325+jrmylow@users.noreply.github.com> Date: Tue, 6 Feb 2024 01:29:51 +0800 Subject: [PATCH 45/50] DOC: Updated docstring for set_option (#57235) * Updated docstring and exceptions raised * updated code_checks.sh * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * testing fix for doc build --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- ci/code_checks.sh | 1 - pandas/_config/config.py | 25 ++++++++++++++++--------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 5b65182d1c253..f8f63de2c3cda 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -136,7 +136,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.tseries.offsets.Milli\ pandas.tseries.offsets.Micro\ pandas.tseries.offsets.Nano\ - pandas.set_option\ pandas.Timestamp.max\ pandas.Timestamp.min\ pandas.Timestamp.resolution\ diff --git a/pandas/_config/config.py b/pandas/_config/config.py index bc9d289ddbaed..7612739531695 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -326,9 +326,9 @@ def __doc__(self) -> str: # type: ignore[override] """ _set_option_tmpl = """ -set_option(pat, value) +set_option(*args, **kwargs) -Sets the value of the specified option. +Sets the value of the specified option or options. Available options: @@ -336,13 +336,18 @@ def __doc__(self) -> str: # type: ignore[override] Parameters ---------- -pat : str - Regexp which should match a single option. - Note: partial matches are supported for convenience, but unless you use the - full option name (e.g. x.y.z.option_name), your code may break in future - versions if new options with similar names are introduced. -value : object - New value of option. +*args : str | object + Arguments provided in pairs, which will be interpreted as (pattern, value) + pairs. + pattern: str + Regexp which should match a single option + value: object + New value of option + Note: partial pattern matches are supported for convenience, but unless you + use the full option name (e.g. x.y.z.option_name), your code may break in + future versions if new options with similar names are introduced. +**kwargs : str + Keyword arguments are not currently supported. Returns ------- @@ -350,6 +355,8 @@ def __doc__(self) -> str: # type: ignore[override] Raises ------ +ValueError if odd numbers of non-keyword arguments are provided +TypeError if keyword arguments are provided OptionError if no such option exists Notes From 8a8c408364c0cd82b6a4b7ee4770f2e383cdd57f Mon Sep 17 00:00:00 2001 From: Trinh Quoc Anh Date: Tue, 6 Feb 2024 00:31:48 +0700 Subject: [PATCH 46/50] Use ruff to detect banned import (#57184) * Use ruff to detect banned import * Combine rules --- .pre-commit-config.yaml | 7 --- pandas/io/common.py | 2 +- pyproject.toml | 3 + scripts/tests/test_use_io_common_urlopen.py | 23 ------- scripts/use_io_common_urlopen.py | 67 --------------------- 5 files changed, 4 insertions(+), 98 deletions(-) delete mode 100644 scripts/tests/test_use_io_common_urlopen.py delete mode 100644 scripts/use_io_common_urlopen.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ccd01abc4affe..b7e43404b86bd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -299,13 +299,6 @@ repos: files: ^pandas/core/ exclude: ^pandas/core/api\.py$ types: [python] - - id: use-io-common-urlopen - name: Use pandas.io.common.urlopen instead of urllib.request.urlopen - language: python - entry: python scripts/use_io_common_urlopen.py - files: ^pandas/ - exclude: ^pandas/tests/ - types: [python] - id: no-bool-in-core-generic name: Use bool_t instead of bool in pandas/core/generic.py entry: python scripts/no_bool_in_generic.py diff --git a/pandas/io/common.py b/pandas/io/common.py index 16d7cb76f9ce9..682780a409a8b 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -286,7 +286,7 @@ def urlopen(*args, **kwargs): """ import urllib.request - return urllib.request.urlopen(*args, **kwargs) + return urllib.request.urlopen(*args, **kwargs) # noqa: TID251 def is_fsspec_url(url: FilePath | BaseBuffer) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 934f66136f601..7614ceecbd8ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -330,6 +330,9 @@ exclude = [ "env", ] +[tool.ruff.lint.flake8-tidy-imports.banned-api] +"urllib.request.urlopen".msg = "Use pandas.io.common.urlopen instead of urllib.request.urlopen" + [tool.ruff.per-file-ignores] # relative imports allowed for asv_bench "asv_bench/*" = ["TID", "NPY002"] diff --git a/scripts/tests/test_use_io_common_urlopen.py b/scripts/tests/test_use_io_common_urlopen.py deleted file mode 100644 index c2c4a7fe9cb58..0000000000000 --- a/scripts/tests/test_use_io_common_urlopen.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest - -from scripts.use_io_common_urlopen import use_io_common_urlopen - -PATH = "t.py" - - -def test_inconsistent_usage(capsys) -> None: - content = "from urllib.request import urlopen" - result_msg = ( - "t.py:1:0: Don't use urllib.request.urlopen, " - "use pandas.io.common.urlopen instead\n" - ) - with pytest.raises(SystemExit, match=None): - use_io_common_urlopen(content, PATH) - expected_msg, _ = capsys.readouterr() - assert result_msg == expected_msg - - -def test_consistent_usage() -> None: - # should not raise - content = "from pandas.io.common import urlopen" - use_io_common_urlopen(content, PATH) diff --git a/scripts/use_io_common_urlopen.py b/scripts/use_io_common_urlopen.py deleted file mode 100644 index ade97f53cd827..0000000000000 --- a/scripts/use_io_common_urlopen.py +++ /dev/null @@ -1,67 +0,0 @@ -""" -Check that pandas/core imports pandas.array as pd_array. - -This makes it easier to grep for usage of pandas array. - -This is meant to be run as a pre-commit hook - to run it manually, you can do: - - pre-commit run use-io-common-urlopen --all-files - -""" - -from __future__ import annotations - -import argparse -import ast -import sys -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from collections.abc import Sequence - - -ERROR_MESSAGE = ( - "{path}:{lineno}:{col_offset}: " - "Don't use urllib.request.urlopen, use pandas.io.common.urlopen instead\n" -) - - -class Visitor(ast.NodeVisitor): - def __init__(self, path: str) -> None: - self.path = path - - def visit_ImportFrom(self, node: ast.ImportFrom) -> None: - # Check that pandas.io.common.urlopen is used instead of - # urllib.request.urlopen - if ( - node.module is not None - and node.module.startswith("urllib.request") - and any(i.name == "urlopen" for i in node.names) - ): - msg = ERROR_MESSAGE.format( - path=self.path, lineno=node.lineno, col_offset=node.col_offset - ) - sys.stdout.write(msg) - sys.exit(1) - super().generic_visit(node) - - -def use_io_common_urlopen(content: str, path: str) -> None: - tree = ast.parse(content) - visitor = Visitor(path) - visitor.visit(tree) - - -def main(argv: Sequence[str] | None = None) -> None: - parser = argparse.ArgumentParser() - parser.add_argument("paths", nargs="*") - args = parser.parse_args(argv) - - for path in args.paths: - with open(path, encoding="utf-8") as fd: - content = fd.read() - use_io_common_urlopen(content, path) - - -if __name__ == "__main__": - main() From 9d50b3cc69da1dd9f6bf1fd4764aaeffc52c503a Mon Sep 17 00:00:00 2001 From: koushik-rout-samsung <146946876+koushik-rout-samsung@users.noreply.github.com> Date: Mon, 5 Feb 2024 23:02:53 +0530 Subject: [PATCH 47/50] WEB: Using Bootstrap icon instead of font awesome icons (#57226) * Using bootstrap icon in place of font awesome * bootstrap icon added * class update * class update --- web/pandas/_templates/layout.html | 12 ++++++------ web/pandas/index.html | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html index c8025aeef3791..aa4bfc92ce8a8 100644 --- a/web/pandas/_templates/layout.html +++ b/web/pandas/_templates/layout.html @@ -14,7 +14,7 @@ {% endfor %} - +
@@ -64,27 +64,27 @@ diff --git a/web/pandas/index.html b/web/pandas/index.html index 3d2e5363a2a2d..a9f5c35458bc8 100644 --- a/web/pandas/index.html +++ b/web/pandas/index.html @@ -74,17 +74,17 @@

Follow us

From 77b4824095735ee156a551427a56e2b6a85de861 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= <6618166+twoertwein@users.noreply.github.com> Date: Mon, 5 Feb 2024 18:14:29 -0500 Subject: [PATCH 48/50] TYP: misc Index return types (#57256) * TYP: misc Index return types * add IndexT to ignore list --- pandas/_typing.py | 1 + pandas/core/generic.py | 2 +- pandas/core/indexes/accessors.py | 5 +++-- pandas/core/indexes/base.py | 7 ++++--- pandas/core/indexes/datetimelike.py | 4 ++-- pandas/core/indexes/datetimes.py | 6 ++++-- pandas/core/indexes/multi.py | 3 ++- pandas/core/indexes/range.py | 2 +- pandas/core/indexes/timedeltas.py | 6 +++++- pyproject.toml | 2 +- 10 files changed, 24 insertions(+), 14 deletions(-) diff --git a/pandas/_typing.py b/pandas/_typing.py index 1fec41463904c..8646b7425894d 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -189,6 +189,7 @@ def __reversed__(self) -> Iterator[_T_co]: # passed in, a DataFrame is always returned. NDFrameT = TypeVar("NDFrameT", bound="NDFrame") +IndexT = TypeVar("IndexT", bound="Index") NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index") AxisInt = int diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 61fb757fafec4..bbe499aad695f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10740,7 +10740,7 @@ def _shift_with_freq(self, periods: int, axis: int, freq) -> Self: f"does not match PeriodIndex freq " f"{freq_to_period_freqstr(orig_freq.n, orig_freq.name)}" ) - new_ax = index.shift(periods) + new_ax: Index = index.shift(periods) else: new_ax = index.shift(periods, freq) diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 1a24ae8530c12..a91fb0a8d718d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -5,6 +5,7 @@ from typing import ( TYPE_CHECKING, + NoReturn, cast, ) import warnings @@ -108,7 +109,7 @@ def _delegate_property_get(self, name: str): # return the result as a Series return Series(result, index=index, name=self.name).__finalize__(self._parent) - def _delegate_property_set(self, name: str, value, *args, **kwargs): + def _delegate_property_set(self, name: str, value, *args, **kwargs) -> NoReturn: raise ValueError( "modifications to a property of a datetimelike object are not supported. " "Change values on the original." @@ -483,7 +484,7 @@ def to_pytimedelta(self) -> np.ndarray: return self._get_values().to_pytimedelta() @property - def components(self): + def components(self) -> DataFrame: """ Return a Dataframe of the components of the Timedeltas. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87135ce9e0dd0..42613ca4c6573 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -55,6 +55,7 @@ F, IgnoreRaise, IndexLabel, + IndexT, JoinHow, Level, NaPosition, @@ -2027,7 +2028,7 @@ def sortlevel( ascending: bool | list[bool] = True, sort_remaining=None, na_position: NaPosition = "first", - ): + ) -> tuple[Self, np.ndarray]: """ For internal compatibility with the Index API. @@ -4432,7 +4433,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool): target = self._maybe_preserve_names(target, preserve_names) return target - def _maybe_preserve_names(self, target: Index, preserve_names: bool): + def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT: if preserve_names and target.nlevels == 1 and target.name != self.name: target = target.copy(deep=False) target.name = self.name @@ -5987,7 +5988,7 @@ def sort(self, *args, **kwargs): """ raise TypeError("cannot sort an Index object in-place, use sort_values instead") - def shift(self, periods: int = 1, freq=None): + def shift(self, periods: int = 1, freq=None) -> Self: """ Shift index by desired number of time frequency increments. diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index ae13edab3a35a..a5670536c74f7 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -273,7 +273,7 @@ def _can_partial_date_slice(self, reso: Resolution) -> bool: def _parsed_string_to_bounds(self, reso: Resolution, parsed): raise NotImplementedError - def _parse_with_reso(self, label: str): + def _parse_with_reso(self, label: str) -> tuple[datetime, Resolution]: # overridden by TimedeltaIndex try: if self.freq is None or hasattr(self.freq, "rule_code"): @@ -295,7 +295,7 @@ def _parse_with_reso(self, label: str): reso = Resolution.from_attrname(reso_str) return parsed, reso - def _get_string_slice(self, key: str): + def _get_string_slice(self, key: str) -> slice | npt.NDArray[np.intp]: # overridden by TimedeltaIndex parsed, reso = self._parse_with_reso(key) try: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c978abd8c2427..3cf3352e64f27 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -518,7 +518,9 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex: # -------------------------------------------------------------------- # Indexing Methods - def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime): + def _parsed_string_to_bounds( + self, reso: Resolution, parsed: dt.datetime + ) -> tuple[Timestamp, Timestamp]: """ Calculate datetime bounds for parsed time string and its resolution. @@ -555,7 +557,7 @@ def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime): # which localizes parsed. return start, end - def _parse_with_reso(self, label: str): + def _parse_with_reso(self, label: str) -> tuple[Timestamp, Resolution]: parsed, reso = super()._parse_with_reso(label) parsed = Timestamp(parsed) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0495f23508c09..f4bf4f3b2f275 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -37,6 +37,7 @@ F, IgnoreRaise, IndexLabel, + IndexT, Scalar, Self, Shape, @@ -2727,7 +2728,7 @@ def _wrap_reindex_result(self, target, indexer, preserve_names: bool): target = self._maybe_preserve_names(target, preserve_names) return target - def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: + def _maybe_preserve_names(self, target: IndexT, preserve_names: bool) -> IndexT: if ( preserve_names and target.nlevels == self.nlevels diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 16b203931c073..2edf6057442b6 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -876,7 +876,7 @@ def _difference(self, other, sort=None): def symmetric_difference( self, other, result_name: Hashable | None = None, sort=None - ): + ) -> Index: if not isinstance(other, RangeIndex) or sort is not None: return super().symmetric_difference(other, result_name, sort) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 08a265ba47648..db813b047b2bb 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -32,6 +32,7 @@ from pandas.core.indexes.extension import inherit_names if TYPE_CHECKING: + from pandas._libs import NaTType from pandas._typing import DtypeObj @@ -245,7 +246,10 @@ def get_loc(self, key): return Index.get_loc(self, key) - def _parse_with_reso(self, label: str): + # error: Return type "tuple[Timedelta | NaTType, None]" of "_parse_with_reso" + # incompatible with return type "tuple[datetime, Resolution]" in supertype + # "DatetimeIndexOpsMixin" + def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # type: ignore[override] # the "with_reso" is a no-op for TimedeltaIndex parsed = Timedelta(label) return parsed, None diff --git a/pyproject.toml b/pyproject.toml index 7614ceecbd8ca..a7cb87bbca4b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -761,5 +761,5 @@ exclude_lines = [ directory = "coverage_html_report" [tool.codespell] -ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse, nin, timere, expec, expecs" +ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse, nin, timere, expec, expecs, indext" ignore-regex = 'https://([\w/\.])+' From 8baee5def5ab9a592c68f5f9b3ad0688ef329493 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:31:26 -1000 Subject: [PATCH 49/50] COMPAT: Numpy 2.0 casting compat (#57265) --- pandas/core/dtypes/cast.py | 1 + pandas/core/internals/blocks.py | 9 ++++++++- pandas/tests/indexing/test_loc.py | 16 ++-------------- pandas/tests/series/test_constructors.py | 12 +++++++++--- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4d76ce5799e7b..dfe12872c3916 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1683,6 +1683,7 @@ def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.n arr = np.asarray(arr) if np.issubdtype(arr.dtype, str): + # TODO(numpy-2.0 min): This case will raise an OverflowError above if (casted.astype(str) == arr).all(): return casted raise ValueError(f"string values cannot be losslessly cast to {dtype}") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 02296643acc3e..910de45d3e89f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1388,7 +1388,14 @@ def setitem(self, indexer, value) -> Block: if isinstance(casted, np.ndarray) and casted.ndim == 1 and len(casted) == 1: # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615 casted = casted[0, ...] - values[indexer] = casted + try: + values[indexer] = casted + except (TypeError, ValueError) as err: + if is_list_like(casted): + raise ValueError( + "setting an array element with a sequence." + ) from err + raise return self def putmask(self, mask, new) -> list[Block]: diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 193c296115479..4f70c63aeb353 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1224,13 +1224,7 @@ def test_loc_setitem_empty_append_raises(self): with pytest.raises(KeyError, match=msg): df.loc[[0, 1], "x"] = data - msg = "|".join( - [ - "cannot copy sequence with size 2 to array axis with dimension 0", - r"could not broadcast input array from shape \(2,\) into shape \(0,\)", - "Must have equal len keys and value when setting with an iterable", - ] - ) + msg = "setting an array element with a sequence." with pytest.raises(ValueError, match=msg): df.loc[0:2, "x"] = data @@ -1556,16 +1550,10 @@ def test_loc_setitem_2d_to_1d_raises(self): # float64 dtype to avoid upcast when trying to set float data ser = Series(range(2), dtype="float64") - msg = "|".join( - [ - r"shape mismatch: value array of shape \(2,2\)", - r"cannot reshape array of size 4 into shape \(2,\)", - ] - ) + msg = "setting an array element with a sequence." with pytest.raises(ValueError, match=msg): ser.loc[range(2)] = data - msg = r"could not broadcast input array from shape \(2,2\) into shape \(2,?\)" with pytest.raises(ValueError, match=msg): ser.loc[:] = data diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a1e08f484ebba..e17cf7491f58b 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1949,9 +1949,15 @@ def test_constructor_int64_dtype(self, any_int_dtype): def test_constructor_raise_on_lossy_conversion_of_strings(self): # GH#44923 - with pytest.raises( - ValueError, match="string values cannot be losslessly cast to int8" - ): + if not np_version_gt2: + raises = pytest.raises( + ValueError, match="string values cannot be losslessly cast to int8" + ) + else: + raises = pytest.raises( + OverflowError, match="The elements provided in the data" + ) + with raises: Series(["128"], dtype="int8") def test_constructor_dtype_timedelta_alternative_construct(self): From 17aa2ba3f9b75bb4083f469b3649e18b80a5352c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 6 Feb 2024 02:29:12 +0000 Subject: [PATCH 50/50] CoW: Remove cow branches from more tests (#57270) --- .../multiindex/test_chaining_and_caching.py | 8 +- .../tests/indexing/multiindex/test_partial.py | 16 +--- .../tests/indexing/multiindex/test_setitem.py | 30 ++----- .../indexing/test_chaining_and_caching.py | 81 +++++-------------- pandas/tests/indexing/test_iat.py | 18 ----- pandas/tests/indexing/test_iloc.py | 32 +++----- pandas/tests/indexing/test_loc.py | 30 ++----- pandas/tests/series/indexing/test_indexing.py | 32 ++------ pandas/tests/series/indexing/test_setitem.py | 17 ++-- pandas/tests/series/methods/test_align.py | 22 ++--- pandas/tests/series/methods/test_copy.py | 53 +++++------- .../series/methods/test_get_numeric_data.py | 7 +- pandas/tests/series/methods/test_rename.py | 10 +-- .../tests/series/methods/test_sort_values.py | 2 +- pandas/tests/series/methods/test_update.py | 16 +--- pandas/tests/series/test_constructors.py | 14 +--- 16 files changed, 102 insertions(+), 286 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index c70c0ee10afd6..43aec12055cd2 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -31,7 +31,7 @@ def test_detect_chained_assignment(): zed["eyes"]["right"].fillna(value=555, inplace=True) -def test_cache_updating(using_copy_on_write): +def test_cache_updating(): # 5216 # make sure that we don't try to set a dead cache a = np.random.default_rng(2).random((10, 3)) @@ -47,11 +47,7 @@ def test_cache_updating(using_copy_on_write): with tm.raises_chained_assignment_error(): df.loc[0]["z"].iloc[0] = 1.0 - if using_copy_on_write: - assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] - else: - result = df.loc[(0, 0), "z"] - assert result == 1 + assert df.loc[(0, 0), "z"] == df_original.loc[0, "z"] # correct setting df.loc[(0, 0), "z"] = 2 diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index b68ab18fbc9b2..dbfabf7666d25 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -119,7 +119,6 @@ def test_getitem_partial_column_select(self): def test_partial_set( self, multiindex_year_month_day_dataframe_random_data, - using_copy_on_write, ): # GH #397 ymd = multiindex_year_month_day_dataframe_random_data @@ -129,13 +128,9 @@ def test_partial_set( exp.iloc[65:85] = 0 tm.assert_frame_equal(df, exp) - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["A"].loc[2000, 4] = 1 - df.loc[(2000, 4), "A"] = 1 - else: - with tm.raises_chained_assignment_error(): - df["A"].loc[2000, 4] = 1 + with tm.raises_chained_assignment_error(): + df["A"].loc[2000, 4] = 1 + df.loc[(2000, 4), "A"] = 1 exp.iloc[65:85, 0] = 1 tm.assert_frame_equal(df, exp) @@ -146,10 +141,7 @@ def test_partial_set( # this works...for now with tm.raises_chained_assignment_error(): df["A"].iloc[14] = 5 - if using_copy_on_write: - assert df["A"].iloc[14] == exp["A"].iloc[14] - else: - assert df["A"].iloc[14] == 5 + assert df["A"].iloc[14] == exp["A"].iloc[14] @pytest.mark.parametrize("dtype", [int, float]) def test_getitem_intkey_leading_level( diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index d731f796637ea..1b6e1341a9c40 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -194,7 +194,7 @@ def test_multiindex_assignment(self): df.loc[4, "d"] = arr tm.assert_series_equal(df.loc[4, "d"], Series(arr, index=[8, 10], name="d")) - def test_multiindex_assignment_single_dtype(self, using_copy_on_write): + def test_multiindex_assignment_single_dtype(self): # GH3777 part 2b # single dtype arr = np.array([0.0, 1.0]) @@ -205,7 +205,6 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): index=[[4, 4, 8], [8, 10, 12]], dtype=np.int64, ) - view = df["c"].iloc[:2].values # arr can be losslessly cast to int, so this setitem is inplace # INFO(CoW-warn) this does not warn because we directly took .values @@ -215,10 +214,6 @@ def test_multiindex_assignment_single_dtype(self, using_copy_on_write): result = df.loc[4, "c"] tm.assert_series_equal(result, exp) - # extra check for inplace-ness - if not using_copy_on_write: - tm.assert_numpy_array_equal(view, exp.values) - # arr + 0.5 cannot be cast losslessly to int, so we upcast with tm.assert_produces_warning( FutureWarning, match="item of incompatible dtype" @@ -412,9 +407,7 @@ def test_setitem_change_dtype(self, multiindex_dataframe_random_data): reindexed = dft.reindex(columns=[("foo", "two")]) tm.assert_series_equal(reindexed["foo", "two"], s > s.median()) - def test_set_column_scalar_with_loc( - self, multiindex_dataframe_random_data, using_copy_on_write - ): + def test_set_column_scalar_with_loc(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data subset = frame.index[[1, 4, 5]] @@ -424,11 +417,8 @@ def test_set_column_scalar_with_loc( frame_original = frame.copy() col = frame["B"] col[subset] = 97 - if using_copy_on_write: - # chained setitem doesn't work with CoW - tm.assert_frame_equal(frame, frame_original) - else: - assert (frame.loc[subset, "B"] == 97).all() + # chained setitem doesn't work with CoW + tm.assert_frame_equal(frame, frame_original) def test_nonunique_assignment_1750(self): df = DataFrame( @@ -505,19 +495,13 @@ def test_setitem_enlargement_keep_index_names(self): tm.assert_frame_equal(df, expected) -def test_frame_setitem_view_direct( - multiindex_dataframe_random_data, using_copy_on_write -): +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no df = multiindex_dataframe_random_data.T - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - df["foo"].values[:] = 0 - assert (df["foo"].values != 0).all() - else: + with pytest.raises(ValueError, match="read-only"): df["foo"].values[:] = 0 - assert (df["foo"].values == 0).all() + assert (df["foo"].values != 0).all() def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 7945d88c4a7dc..718ea69960775 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -27,7 +27,7 @@ def random_text(nobs=100): class TestCaching: - def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): + def test_slice_consolidate_invalidate_item_cache(self): # this is chained assignment, but will 'work' with option_context("chained_assignment", None): # #3970 @@ -61,7 +61,7 @@ def test_setitem_cache_updating(self, do_ref): assert df.loc[0, "c"] == 0.0 assert df.loc[7, "c"] == 1.0 - def test_setitem_cache_updating_slices(self, using_copy_on_write): + def test_setitem_cache_updating_slices(self): # GH 7084 # not updating cache on series setting with slices expected = DataFrame( @@ -85,15 +85,11 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write): out_original = out.copy() for ix, row in df.iterrows(): v = out[row["C"]][six:eix] + row["D"] - with tm.raises_chained_assignment_error((ix == 0) or using_copy_on_write): + with tm.raises_chained_assignment_error(): out[row["C"]][six:eix] = v - if not using_copy_on_write: - tm.assert_frame_equal(out, expected) - tm.assert_series_equal(out["A"], expected["A"]) - else: - tm.assert_frame_equal(out, out_original) - tm.assert_series_equal(out["A"], out_original["A"]) + tm.assert_frame_equal(out, out_original) + tm.assert_series_equal(out["A"], out_original["A"]) out = DataFrame({"A": [0, 0, 0]}, index=date_range("5/7/2014", "5/9/2014")) for ix, row in df.iterrows(): @@ -102,7 +98,7 @@ def test_setitem_cache_updating_slices(self, using_copy_on_write): tm.assert_frame_equal(out, expected) tm.assert_series_equal(out["A"], expected["A"]) - def test_altering_series_clears_parent_cache(self, using_copy_on_write): + def test_altering_series_clears_parent_cache(self): # GH #33675 df = DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) ser = df["A"] @@ -116,49 +112,36 @@ def test_altering_series_clears_parent_cache(self, using_copy_on_write): class TestChaining: - def test_setitem_chained_setfault(self, using_copy_on_write): + def test_setitem_chained_setfault(self): # GH6026 data = ["right", "left", "left", "left", "right", "left", "timeout"] - mdata = ["right", "left", "left", "left", "right", "left", "none"] df = DataFrame({"response": np.array(data)}) mask = df.response == "timeout" with tm.raises_chained_assignment_error(): df.response[mask] = "none" - if using_copy_on_write: - tm.assert_frame_equal(df, DataFrame({"response": data})) - else: - tm.assert_frame_equal(df, DataFrame({"response": mdata})) + tm.assert_frame_equal(df, DataFrame({"response": data})) recarray = np.rec.fromarrays([data], names=["response"]) df = DataFrame(recarray) mask = df.response == "timeout" with tm.raises_chained_assignment_error(): df.response[mask] = "none" - if using_copy_on_write: - tm.assert_frame_equal(df, DataFrame({"response": data})) - else: - tm.assert_frame_equal(df, DataFrame({"response": mdata})) + tm.assert_frame_equal(df, DataFrame({"response": data})) df = DataFrame({"response": data, "response1": data}) df_original = df.copy() mask = df.response == "timeout" with tm.raises_chained_assignment_error(): df.response[mask] = "none" - if using_copy_on_write: - tm.assert_frame_equal(df, df_original) - else: - tm.assert_frame_equal(df, DataFrame({"response": mdata, "response1": data})) + tm.assert_frame_equal(df, df_original) # GH 6056 expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) df = DataFrame({"A": np.array(["foo", "bar", "bah", "foo", "bar"])}) with tm.raises_chained_assignment_error(): df["A"].iloc[0] = np.nan - if using_copy_on_write: - expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) - else: - expected = DataFrame({"A": [np.nan, "bar", "bah", "foo", "bar"]}) + expected = DataFrame({"A": ["foo", "bar", "bah", "foo", "bar"]}) result = df.head() tm.assert_frame_equal(result, expected) @@ -169,10 +152,9 @@ def test_setitem_chained_setfault(self, using_copy_on_write): tm.assert_frame_equal(result, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment(self, using_copy_on_write): + def test_detect_chained_assignment(self): with option_context("chained_assignment", "raise"): # work with the chain - expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) df = DataFrame( np.arange(4).reshape(2, 2), columns=list("AB"), dtype="int64" ) @@ -182,10 +164,7 @@ def test_detect_chained_assignment(self, using_copy_on_write): df["A"][0] = -5 with tm.raises_chained_assignment_error(): df["A"][1] = -6 - if using_copy_on_write: - tm.assert_frame_equal(df, df_original) - else: - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, df_original) @pytest.mark.arm_slow def test_detect_chained_assignment_raises(self): @@ -340,9 +319,7 @@ def test_detect_chained_assignment_warnings_errors(self): df.loc[0]["A"] = 111 @pytest.mark.parametrize("rhs", [3, DataFrame({0: [1, 2, 3, 4]})]) - def test_detect_chained_assignment_warning_stacklevel( - self, rhs, using_copy_on_write - ): + def test_detect_chained_assignment_warning_stacklevel(self, rhs): # GH#42570 df = DataFrame(np.arange(25).reshape(5, 5)) df_original = df.copy() @@ -379,7 +356,7 @@ def test_cache_updating(self): assert "Hello Friend" in df["A"].index assert "Hello Friend" in df["B"].index - def test_cache_updating2(self, using_copy_on_write): + def test_cache_updating2(self): # 10264 df = DataFrame( np.zeros((5, 5), dtype="int64"), @@ -388,26 +365,11 @@ def test_cache_updating2(self, using_copy_on_write): ) df["f"] = 0 df_orig = df.copy() - if using_copy_on_write: - with pytest.raises(ValueError, match="read-only"): - df.f.values[3] = 1 - tm.assert_frame_equal(df, df_orig) - return - - df.f.values[3] = 1 - - df.f.values[3] = 2 - expected = DataFrame( - np.zeros((5, 6), dtype="int64"), - columns=["a", "b", "c", "d", "e", "f"], - index=range(5), - ) - expected.at[3, "f"] = 2 - tm.assert_frame_equal(df, expected) - expected = Series([0, 0, 0, 2, 0], name="f") - tm.assert_series_equal(df.f, expected) + with pytest.raises(ValueError, match="read-only"): + df.f.values[3] = 1 + tm.assert_frame_equal(df, df_orig) - def test_iloc_setitem_chained_assignment(self, using_copy_on_write): + def test_iloc_setitem_chained_assignment(self): # GH#3970 with option_context("chained_assignment", None): df = DataFrame({"aa": range(5), "bb": [2.2] * 5}) @@ -424,10 +386,7 @@ def test_iloc_setitem_chained_assignment(self, using_copy_on_write): with tm.raises_chained_assignment_error(): df["bb"].iloc[0] = 0.15 - if not using_copy_on_write: - assert df["bb"].iloc[0] == 0.15 - else: - assert df["bb"].iloc[0] == 2.2 + assert df["bb"].iloc[0] == 2.2 def test_getitem_loc_assignment_slice_state(self): # GH 13569 diff --git a/pandas/tests/indexing/test_iat.py b/pandas/tests/indexing/test_iat.py index 4497c16efdfda..bb9252a50eb2a 100644 --- a/pandas/tests/indexing/test_iat.py +++ b/pandas/tests/indexing/test_iat.py @@ -28,21 +28,3 @@ def test_iat_getitem_series_with_period_index(): expected = ser[index[0]] result = ser.iat[0] assert expected == result - - -def test_iat_setitem_item_cache_cleared(indexer_ial, using_copy_on_write): - # GH#45684 - data = {"x": np.arange(8, dtype=np.int64), "y": np.int64(0)} - df = DataFrame(data).copy() - ser = df["y"] - - # previously this iat setting would split the block and fail to clear - # the item_cache. - indexer_ial(df)[7, 0] = 9999 - - indexer_ial(df)[7, 1] = 1234 - - assert df.iat[7, 1] == 1234 - if not using_copy_on_write: - assert ser.iloc[-1] == 1234 - assert df.iloc[-1, -1] == 1234 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 5453c8be0e832..8650a1afb383d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -105,9 +105,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer_li, key): expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)}) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_ea_inplace( - self, frame_or_series, index_or_series_or_array, using_copy_on_write - ): + def test_iloc_setitem_ea_inplace(self, frame_or_series, index_or_series_or_array): # GH#38952 Case with not setting a full column # IntegerArray without NAs arr = array([1, 2, 3, 4]) @@ -128,11 +126,8 @@ def test_iloc_setitem_ea_inplace( # Check that we are actually in-place if frame_or_series is Series: - if using_copy_on_write: - assert obj.values is not values - assert np.shares_memory(obj.values, values) - else: - assert obj.values is values + assert obj.values is not values + assert np.shares_memory(obj.values, values) else: assert np.shares_memory(obj[0].values, values) @@ -843,7 +838,7 @@ def test_iloc_empty_list_indexer_is_ok(self): df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object(self, using_copy_on_write): + def test_identity_slice_returns_new_object(self): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.iloc[:] @@ -855,10 +850,7 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig # depending on CoW original_df.loc[:, "a"] = [4, 4, 4] - if using_copy_on_write: - assert (sliced_df["a"] == [1, 2, 3]).all() - else: - assert (sliced_df["a"] == 4).all() + assert (sliced_df["a"] == [1, 2, 3]).all() original_series = Series([1, 2, 3, 4, 5, 6]) sliced_series = original_series.iloc[:] @@ -866,11 +858,8 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): # should also be a shallow copy original_series[:3] = [7, 8, 9] - if using_copy_on_write: - # shallow copy not updated (CoW) - assert all(sliced_series[:3] == [1, 2, 3]) - else: - assert all(sliced_series[:3] == [7, 8, 9]) + # shallow copy not updated (CoW) + assert all(sliced_series[:3] == [1, 2, 3]) def test_indexing_zerodim_np_array(self): # GH24919 @@ -1414,7 +1403,7 @@ def test_frame_iloc_setitem_callable(self): class TestILocSeries: - def test_iloc(self, using_copy_on_write): + def test_iloc(self): ser = Series( np.random.default_rng(2).standard_normal(10), index=list(range(0, 20, 2)) ) @@ -1434,10 +1423,7 @@ def test_iloc(self, using_copy_on_write): with tm.assert_produces_warning(None): # GH#45324 make sure we aren't giving a spurious FutureWarning result[:] = 0 - if using_copy_on_write: - tm.assert_series_equal(ser, ser_original) - else: - assert (ser.iloc[1:3] == 0).all() + tm.assert_series_equal(ser, ser_original) # list of integers result = ser.iloc[[0, 2, 3, 4, 5]] diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 4f70c63aeb353..f263f92b4f0eb 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1089,7 +1089,7 @@ def test_loc_empty_list_indexer_is_ok(self): df.loc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) - def test_identity_slice_returns_new_object(self, using_copy_on_write): + def test_identity_slice_returns_new_object(self): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) @@ -1104,17 +1104,11 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): # Setting using .loc[:, "a"] sets inplace so alters both sliced and orig # depending on CoW original_df.loc[:, "a"] = [4, 4, 4] - if using_copy_on_write: - assert (sliced_df["a"] == [1, 2, 3]).all() - else: - assert (sliced_df["a"] == 4).all() + assert (sliced_df["a"] == [1, 2, 3]).all() # These should not return copies df = DataFrame(np.random.default_rng(2).standard_normal((10, 4))) - if using_copy_on_write: - assert df[0] is not df.loc[:, 0] - else: - assert df[0] is df.loc[:, 0] + assert df[0] is not df.loc[:, 0] # Same tests for Series original_series = Series([1, 2, 3, 4, 5, 6]) @@ -1123,17 +1117,10 @@ def test_identity_slice_returns_new_object(self, using_copy_on_write): assert original_series[:] is not original_series original_series[:3] = [7, 8, 9] - if using_copy_on_write: - assert all(sliced_series[:3] == [1, 2, 3]) - else: - assert all(sliced_series[:3] == [7, 8, 9]) + assert all(sliced_series[:3] == [1, 2, 3]) - def test_loc_copy_vs_view(self, request, using_copy_on_write): + def test_loc_copy_vs_view(self, request): # GH 15631 - - if not using_copy_on_write: - mark = pytest.mark.xfail(reason="accidental fix reverted - GH37497") - request.applymarker(mark) x = DataFrame(zip(range(3), range(3)), columns=["a", "b"]) y = x.copy() @@ -2634,7 +2621,7 @@ def test_loc_setitem_boolean_and_column(self, float_frame): expected = DataFrame(values, index=expected.index, columns=expected.columns) tm.assert_frame_equal(float_frame, expected) - def test_loc_setitem_ndframe_values_alignment(self, using_copy_on_write): + def test_loc_setitem_ndframe_values_alignment(self): # GH#45501 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) df.loc[[False, False, True], ["a"]] = DataFrame( @@ -2658,10 +2645,7 @@ def test_loc_setitem_ndframe_values_alignment(self, using_copy_on_write): df_orig = df.copy() ser = df["a"] ser.loc[[False, False, True]] = Series([10, 11, 12], index=[2, 1, 0]) - if using_copy_on_write: - tm.assert_frame_equal(df, df_orig) - else: - tm.assert_frame_equal(df, expected) + tm.assert_frame_equal(df, df_orig) def test_loc_indexer_empty_broadcast(self): # GH#51450 diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 50c167f3f3a28..5c36877e5ac86 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -101,16 +101,12 @@ def test_basic_getitem_dt64tz_values(): assert result == expected -def test_getitem_setitem_ellipsis(using_copy_on_write): +def test_getitem_setitem_ellipsis(): s = Series(np.random.default_rng(2).standard_normal(10)) result = s[...] tm.assert_series_equal(result, s) - s[...] = 5 - if not using_copy_on_write: - assert (result == 5).all() - @pytest.mark.parametrize( "result_1, duplicate_item, expected_1", @@ -242,7 +238,7 @@ def test_basic_getitem_setitem_corner(datetime_series): datetime_series[[5, [None, None]]] = 2 -def test_slice(string_series, object_series, using_copy_on_write): +def test_slice(string_series, object_series): original = string_series.copy() numSlice = string_series[10:20] numSliceEnd = string_series[-10:] @@ -261,11 +257,8 @@ def test_slice(string_series, object_series, using_copy_on_write): sl = string_series[10:20] sl[:] = 0 - if using_copy_on_write: - # Doesn't modify parent (CoW) - tm.assert_series_equal(string_series, original) - else: - assert (string_series[10:20] == 0).all() + # Doesn't modify parent (CoW) + tm.assert_series_equal(string_series, original) def test_timedelta_assignment(): @@ -282,7 +275,7 @@ def test_timedelta_assignment(): tm.assert_series_equal(s, expected) -def test_underlying_data_conversion(using_copy_on_write): +def test_underlying_data_conversion(): # GH 4080 df = DataFrame({c: [1, 2, 3] for c in ["a", "b", "c"]}) return_value = df.set_index(["a", "b", "c"], inplace=True) @@ -292,18 +285,9 @@ def test_underlying_data_conversion(using_copy_on_write): df_original = df.copy() df - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["val"].update(s) - expected = df_original - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["val"].update(s) - expected = DataFrame( - {"a": [1, 2, 3], "b": [1, 2, 3], "c": [1, 2, 3], "val": [0, 1, 0]} - ) - return_value = expected.set_index(["a", "b", "c"], inplace=True) - assert return_value is None + with tm.raises_chained_assignment_error(): + df["val"].update(s) + expected = df_original tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 85ffb0f8fe647..6be325073bb67 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -437,7 +437,7 @@ def test_setitem_mask_smallint_no_upcast(self): class TestSetitemViewCopySemantics: - def test_setitem_invalidates_datetime_index_freq(self, using_copy_on_write): + def test_setitem_invalidates_datetime_index_freq(self): # GH#24096 altering a datetime64tz Series inplace invalidates the # `freq` attribute on the underlying DatetimeIndex @@ -445,10 +445,7 @@ def test_setitem_invalidates_datetime_index_freq(self, using_copy_on_write): ts = dti[1] ser = Series(dti) assert ser._values is not dti - if using_copy_on_write: - assert ser._values._ndarray.base is dti._data._ndarray.base - else: - assert ser._values._ndarray.base is not dti._data._ndarray.base + assert ser._values._ndarray.base is dti._data._ndarray.base assert dti.freq == "D" ser.iloc[1] = NaT assert ser._values.freq is None @@ -459,18 +456,14 @@ def test_setitem_invalidates_datetime_index_freq(self, using_copy_on_write): assert dti[1] == ts assert dti.freq == "D" - def test_dt64tz_setitem_does_not_mutate_dti(self, using_copy_on_write): + def test_dt64tz_setitem_does_not_mutate_dti(self): # GH#21907, GH#24096 dti = date_range("2016-01-01", periods=10, tz="US/Pacific") ts = dti[0] ser = Series(dti) assert ser._values is not dti - if using_copy_on_write: - assert ser._values._ndarray.base is dti._data._ndarray.base - assert ser._mgr.arrays[0]._ndarray.base is dti._data._ndarray.base - else: - assert ser._values._ndarray.base is not dti._data._ndarray.base - assert ser._mgr.arrays[0]._ndarray.base is not dti._data._ndarray.base + assert ser._values._ndarray.base is dti._data._ndarray.base + assert ser._mgr.arrays[0]._ndarray.base is dti._data._ndarray.base assert ser._mgr.arrays[0] is not dti diff --git a/pandas/tests/series/methods/test_align.py b/pandas/tests/series/methods/test_align.py index cb60cd2e5bcf3..df3dd6f4d8ab0 100644 --- a/pandas/tests/series/methods/test_align.py +++ b/pandas/tests/series/methods/test_align.py @@ -89,7 +89,7 @@ def test_align_fill_method( tm.assert_series_equal(ab, eb) -def test_align_nocopy(datetime_series, using_copy_on_write): +def test_align_nocopy(datetime_series): b = datetime_series[:5].copy() # do copy @@ -102,10 +102,7 @@ def test_align_nocopy(datetime_series, using_copy_on_write): a = datetime_series.copy() ra, _ = a.align(b, join="left", copy=False) ra[:5] = 5 - if using_copy_on_write: - assert not (a[:5] == 5).any() - else: - assert (a[:5] == 5).all() + assert not (a[:5] == 5).any() # do copy a = datetime_series.copy() @@ -119,20 +116,13 @@ def test_align_nocopy(datetime_series, using_copy_on_write): b = datetime_series[:5].copy() _, rb = a.align(b, join="right", copy=False) rb[:2] = 5 - if using_copy_on_write: - assert not (b[:2] == 5).any() - else: - assert (b[:2] == 5).all() + assert not (b[:2] == 5).any() -def test_align_same_index(datetime_series, using_copy_on_write): +def test_align_same_index(datetime_series): a, b = datetime_series.align(datetime_series, copy=False) - if not using_copy_on_write: - assert a.index is datetime_series.index - assert b.index is datetime_series.index - else: - assert a.index.is_(datetime_series.index) - assert b.index.is_(datetime_series.index) + assert a.index.is_(datetime_series.index) + assert b.index.is_(datetime_series.index) a, b = datetime_series.align(datetime_series, copy=True) assert a.index is not datetime_series.index diff --git a/pandas/tests/series/methods/test_copy.py b/pandas/tests/series/methods/test_copy.py index ea439fb5a3263..ad5417d330d51 100644 --- a/pandas/tests/series/methods/test_copy.py +++ b/pandas/tests/series/methods/test_copy.py @@ -10,7 +10,7 @@ class TestCopy: @pytest.mark.parametrize("deep", ["default", None, False, True]) - def test_copy(self, deep, using_copy_on_write): + def test_copy(self, deep): ser = Series(np.arange(10), dtype="float64") # default deep is True @@ -19,28 +19,22 @@ def test_copy(self, deep, using_copy_on_write): else: ser2 = ser.copy(deep=deep) - if using_copy_on_write: - # INFO(CoW) a shallow copy doesn't yet copy the data - # but parent will not be modified (CoW) - if deep is None or deep is False: - assert np.may_share_memory(ser.values, ser2.values) - else: - assert not np.may_share_memory(ser.values, ser2.values) + # INFO(CoW) a shallow copy doesn't yet copy the data + # but parent will not be modified (CoW) + if deep is None or deep is False: + assert np.may_share_memory(ser.values, ser2.values) + else: + assert not np.may_share_memory(ser.values, ser2.values) ser2[::2] = np.nan - if deep is not False or using_copy_on_write: - # Did not modify original Series - assert np.isnan(ser2[0]) - assert not np.isnan(ser[0]) - else: - # we DID modify the original Series - assert np.isnan(ser2[0]) - assert np.isnan(ser[0]) + # Did not modify original Series + assert np.isnan(ser2[0]) + assert not np.isnan(ser[0]) @pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning") @pytest.mark.parametrize("deep", ["default", None, False, True]) - def test_copy_tzaware(self, deep, using_copy_on_write): + def test_copy_tzaware(self, deep): # GH#11794 # copy of tz-aware expected = Series([Timestamp("2012/01/01", tz="UTC")]) @@ -53,25 +47,18 @@ def test_copy_tzaware(self, deep, using_copy_on_write): else: ser2 = ser.copy(deep=deep) - if using_copy_on_write: - # INFO(CoW) a shallow copy doesn't yet copy the data - # but parent will not be modified (CoW) - if deep is None or deep is False: - assert np.may_share_memory(ser.values, ser2.values) - else: - assert not np.may_share_memory(ser.values, ser2.values) + # INFO(CoW) a shallow copy doesn't yet copy the data + # but parent will not be modified (CoW) + if deep is None or deep is False: + assert np.may_share_memory(ser.values, ser2.values) + else: + assert not np.may_share_memory(ser.values, ser2.values) ser2[0] = Timestamp("1999/01/01", tz="UTC") - # default deep is True - if deep is not False or using_copy_on_write: - # Did not modify original Series - tm.assert_series_equal(ser2, expected2) - tm.assert_series_equal(ser, expected) - else: - # we DID modify the original Series - tm.assert_series_equal(ser2, expected2) - tm.assert_series_equal(ser, expected2) + # Did not modify original Series + tm.assert_series_equal(ser2, expected2) + tm.assert_series_equal(ser, expected) def test_copy_name(self, datetime_series): result = datetime_series.copy() diff --git a/pandas/tests/series/methods/test_get_numeric_data.py b/pandas/tests/series/methods/test_get_numeric_data.py index 11dc6d5c57162..f25583904377a 100644 --- a/pandas/tests/series/methods/test_get_numeric_data.py +++ b/pandas/tests/series/methods/test_get_numeric_data.py @@ -7,7 +7,7 @@ class TestGetNumericData: - def test_get_numeric_data_preserve_dtype(self, using_copy_on_write): + def test_get_numeric_data_preserve_dtype(self): # get the numeric data obj = Series([1, 2, 3]) result = obj._get_numeric_data() @@ -15,10 +15,7 @@ def test_get_numeric_data_preserve_dtype(self, using_copy_on_write): # returned object is a shallow copy result.iloc[0] = 0 - if using_copy_on_write: - assert obj.iloc[0] == 1 - else: - assert obj.iloc[0] == 0 + assert obj.iloc[0] == 1 obj = Series([1, "2", 3.0]) result = obj._get_numeric_data() diff --git a/pandas/tests/series/methods/test_rename.py b/pandas/tests/series/methods/test_rename.py index e59389ab069d3..c67298b777f6d 100644 --- a/pandas/tests/series/methods/test_rename.py +++ b/pandas/tests/series/methods/test_rename.py @@ -169,15 +169,11 @@ def test_rename_error_arg(self): with pytest.raises(KeyError, match=match): ser.rename({2: 9}, errors="raise") - def test_rename_copy_false(self, using_copy_on_write): + def test_rename_copy_false(self): # GH 46889 ser = Series(["foo", "bar"]) ser_orig = ser.copy() shallow_copy = ser.rename({1: 9}, copy=False) ser[0] = "foobar" - if using_copy_on_write: - assert ser_orig[0] == shallow_copy[0] - assert ser_orig[1] == shallow_copy[9] - else: - assert ser[0] == shallow_copy[0] - assert ser[1] == shallow_copy[9] + assert ser_orig[0] == shallow_copy[0] + assert ser_orig[1] == shallow_copy[9] diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index bd548eb80e182..995d9d176fc2b 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -10,7 +10,7 @@ class TestSeriesSortValues: - def test_sort_values(self, datetime_series, using_copy_on_write): + def test_sort_values(self, datetime_series): # check indexes are reordered corresponding with the values ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"]) expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"]) diff --git a/pandas/tests/series/methods/test_update.py b/pandas/tests/series/methods/test_update.py index 191aa36ad5d41..1d29e116be5c2 100644 --- a/pandas/tests/series/methods/test_update.py +++ b/pandas/tests/series/methods/test_update.py @@ -14,7 +14,7 @@ class TestUpdate: - def test_update(self, using_copy_on_write): + def test_update(self): s = Series([1.5, np.nan, 3.0, 4.0, np.nan]) s2 = Series([np.nan, 3.5, np.nan, 5.0]) s.update(s2) @@ -29,17 +29,9 @@ def test_update(self, using_copy_on_write): df["c"] = df["c"].astype(object) df_orig = df.copy() - if using_copy_on_write: - with tm.raises_chained_assignment_error(): - df["c"].update(Series(["foo"], index=[0])) - expected = df_orig - else: - with tm.assert_produces_warning(FutureWarning, match="inplace method"): - df["c"].update(Series(["foo"], index=[0])) - expected = DataFrame( - [[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"] - ) - expected["c"] = expected["c"].astype(object) + with tm.raises_chained_assignment_error(): + df["c"].update(Series(["foo"], index=[0])) + expected = df_orig tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e17cf7491f58b..b00074c04257e 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -622,15 +622,12 @@ def test_constructor_maskedarray_hardened(self): expected = Series([np.nan, np.nan, np.nan]) tm.assert_series_equal(result, expected) - def test_series_ctor_plus_datetimeindex(self, using_copy_on_write): + def test_series_ctor_plus_datetimeindex(self): rng = date_range("20090415", "20090519", freq="B") data = {k: 1 for k in rng} result = Series(data, index=rng) - if using_copy_on_write: - assert result.index.is_(rng) - else: - assert result.index is rng + assert result.index.is_(rng) def test_constructor_default_index(self): s = Series([0, 1, 2]) @@ -891,16 +888,13 @@ def test_constructor_invalid_coerce_ints_with_float_nan(self, any_int_numpy_dtyp with pytest.raises(IntCastingNaNError, match=msg): Series(np.array(vals), dtype=any_int_numpy_dtype) - def test_constructor_dtype_no_cast(self, using_copy_on_write): + def test_constructor_dtype_no_cast(self): # see gh-1572 s = Series([1, 2, 3]) s2 = Series(s, dtype=np.int64) s2[1] = 5 - if using_copy_on_write: - assert s[1] == 2 - else: - assert s[1] == 5 + assert s[1] == 2 def test_constructor_datelike_coercion(self): # GH 9477