diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f44ba8..da1d7c1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: # fail-fast: false matrix: platform: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.8, 3.12] # to minimise complexity we only test a min and a max version + python-version: [3.9, 3.12] # to minimise complexity we only test a min and a max version # you can separate the tox-envs in different .yml files # see version 0.10.1 # https://github.com/coroa/pandas-indexing/releases/tag/v0.10.1 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index cc5429b..d3c9961 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,13 @@ Changelog ========= +* Bumps minimum python version to 3.9 (which is close to EOL, anyway) +* Improve :mod:`~selectors` to arbitrarily interact with boolean Series, numpy arrays + and callables, ie. + 1. ``pd.Series([True, False]) & isin(model="a")`` produces the same result as + ``isin(model="a") & pd.Series([True, False])`` did earlier. + 2. ``isin(model="a") & (lambda s: s > 2)`` is now supported as well. +* Fix a testing incompability introduced by a recent attrs update (24.1.0) * Load pint and pint-pandas packages only on first use v0.5.1 (2024-05-20) diff --git a/docs/notebooks/introduction.ipynb b/docs/notebooks/introduction.ipynb index dc4c176..5de95f0 100644 --- a/docs/notebooks/introduction.ipynb +++ b/docs/notebooks/introduction.ipynb @@ -2749,12 +2749,12 @@ "id": "7e1d4f00", "metadata": {}, "source": [ - "````{warning}\n", - "It is currently impossible to use a pandas boolean series **in front of** a selector; ie.\n", + "````{note}\n", + "It is only possible from version 0.5.2 to use a pandas boolean series **in front of** a selector; ie.\n", "```python\n", "(capacity[2030] > 250) & isin(variable=[\"Coal\", \"Gas\", \"Nuclear\"], unit=\"GW\")\n", "```\n", - "will **fail**, it needs to be\n", + "works, as you would expect it, in the same way as\n", "```python\n", "isin(variable=[\"Coal\", \"Gas\", \"Nuclear\"], unit=\"GW\") & (capacity[2030] > 250)\n", "```\n", @@ -3142,8 +3142,20 @@ { "data": { "text/plain": [ - "array([False, False, False, False, False, False, False, False, True,\n", - " True, True, True])" + "model scenario region unit carrier fuel year\n", + "REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity Coal 2030 False\n", + " 2040 False\n", + " 2050 False\n", + " 2060 False\n", + " Gas 2030 False\n", + " 2040 False\n", + " 2050 False\n", + " 2060 False\n", + " Nuclear 2030 True\n", + " 2040 True\n", + " 2050 True\n", + " 2060 True\n", + "dtype: bool" ] }, "execution_count": 18, @@ -3244,10 +3256,10 @@ { "data": { "text/plain": [ - "fuel year model scenario region unit carrier \n", - "Coal 2030 REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity 182.0149\n", - "Nuclear 2040 REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity 214.4376\n", - " 2050 REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity 156.7766\n", + "model scenario region unit carrier fuel year\n", + "REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity Coal 2030 182.0149\n", + " Nuclear 2040 214.4376\n", + " 2050 156.7766\n", "dtype: float64" ] }, @@ -6496,7 +6508,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.12.2" }, "vscode": { "interpreter": { diff --git a/pyproject.toml b/pyproject.toml index f06e09e..7139513 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ classifiers = [ 'Operating System :: POSIX', 'Operating System :: MacOS', 'Operating System :: Microsoft', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', @@ -36,7 +35,7 @@ dependencies = [ 'deprecated', "attrs", ] -requires-python = ">=3.8, <4" +requires-python = ">=3.9, <4" dynamic = ["version"] diff --git a/src/pandas_indexing/core.py b/src/pandas_indexing/core.py index 9390ffb..2f7ced8 100644 --- a/src/pandas_indexing/core.py +++ b/src/pandas_indexing/core.py @@ -1044,7 +1044,7 @@ def aggregatelevel( agg_func: str = "sum", axis: Axis = 0, dropna: bool = True, - mode: Literal["replace", "add", "return"] = "replace", + mode: Literal["replace", "append", "return"] = "replace", **levels: Dict[str, Sequence[Any]], ) -> T: """Aggregate labels on one or multiple levels together. diff --git a/src/pandas_indexing/selectors.py b/src/pandas_indexing/selectors.py index e65f4d2..70e270c 100644 --- a/src/pandas_indexing/selectors.py +++ b/src/pandas_indexing/selectors.py @@ -19,8 +19,9 @@ def maybe_const(x): class Selector: - # Tell numpy that we want precedence + # Tell numpy and pandas that we want precedence __array_ufunc__ = None + __pandas_priority__ = 5000 def __invert__(self): return Not(self) @@ -49,8 +50,8 @@ class BinOp(Selector): class Const(Selector): val: Any - def __call__(self, _): - return self.val + def __call__(self, df): + return self.val(df) if callable(self.val) else self.val @define @@ -70,7 +71,7 @@ class Not(Selector): a: Selector def __call__(self, df): - return ~self.a.__call__(df) + return ~self.a(df) class Special(Selector): diff --git a/src/pandas_indexing/utils.py b/src/pandas_indexing/utils.py index b81e409..d74c976 100644 --- a/src/pandas_indexing/utils.py +++ b/src/pandas_indexing/utils.py @@ -6,8 +6,9 @@ import importlib import re from types import ModuleType -from typing import Union +from typing import Any, Union +from attrs import define from pandas import DataFrame, Index, Series from pandas.util._decorators import doc # noqa: F401 @@ -147,3 +148,14 @@ def __getattr__(self, item): def __dir__(self): module = self._load() return dir(module) + + +@define +class EqualIdentity: + __array_ufunc__ = None + __pandas_priority__ = 5000 + + obj: Any + + def __eq__(self, other): + return self.obj is other diff --git a/tests/test_core.py b/tests/test_core.py index f3e4452..f320355 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -2,7 +2,6 @@ Performs general tests. """ -import sys from re import escape from textwrap import dedent @@ -497,26 +496,22 @@ def test_semijoin(mdf, mseries): ), ) - # Python 3.12 changes the level order - level_order = ( - ["new", "str", "num"] - if sys.version_info >= (3, 12, 0) - else ["str", "num", "new"] - ) - # Right-join assert_frame_equal( semijoin(mdf, index, how="right"), DataFrame( {col: r_[mdf[col].values[1:3], nan] for col in mdf}, - index=index.reorder_levels(level_order), + index=index.reorder_levels(["new", "str", "num"]), ), ) # Right-join on series assert_series_equal( semijoin(mseries, index, how="right"), - Series(r_[mseries.values[1:3], nan], index=index.reorder_levels(level_order)), + Series( + r_[mseries.values[1:3], nan], + index=index.reorder_levels(["new", "str", "num"]), + ), ) diff --git a/tests/test_selectors.py b/tests/test_selectors.py index ac474d9..7a0c2f0 100644 --- a/tests/test_selectors.py +++ b/tests/test_selectors.py @@ -15,6 +15,7 @@ Not, Or, ) +from pandas_indexing.utils import EqualIdentity def test_isin_mseries(mseries: Series): @@ -47,11 +48,14 @@ def test_isin_operations(mdf: DataFrame): s_b = Series([False, True, False], mdf.index) sel = isin(str="bar") | s_b - assert sel == Or(Isin(dict(str="bar")), Const(s_b)) + assert sel == Or(Isin(dict(str="bar")), Const(EqualIdentity(s_b))) - with pytest.raises(AttributeError): - # Series then selector does not work yet - s_b & isin(str="bar") + sel = s_b & isin(str="bar") + assert sel == And(Isin(dict(str="bar")), Const(EqualIdentity(s_b))) + + assert_frame_equal( + mdf.loc[isin(str="foo") & (lambda df: df.two >= 2)], mdf.iloc[[1]] + ) def test_ismatch_single(sdf: DataFrame):