coroa · coroa · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -13,7 +13,7 @@ jobs:
       # fail-fast: false
       matrix:
         platform: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: [3.8, 3.12] # to minimise complexity we only test a min and a max version
+        python-version: [3.9, 3.12] # to minimise complexity we only test a min and a max version
         # you can separate the tox-envs in different .yml files
         # see version 0.10.1
         # https://github.com/coroa/pandas-indexing/releases/tag/v0.10.1

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -3,6 +3,13 @@
 Changelog
 =========
 
+* Bumps minimum python version to 3.9 (which is close to EOL, anyway)
+* Improve :mod:`~selectors` to arbitrarily interact with boolean Series, numpy arrays
+  and callables, ie.
+  1. ``pd.Series([True, False]) & isin(model="a")`` produces the same result as
+     ``isin(model="a") & pd.Series([True, False])`` did earlier.
+  2. ``isin(model="a") & (lambda s: s > 2)`` is now supported as well.
+* Fix a testing incompability introduced by a recent attrs update (24.1.0)
 * Load pint and pint-pandas packages only on first use
 
 v0.5.1 (2024-05-20)

diff --git a/docs/notebooks/introduction.ipynb b/docs/notebooks/introduction.ipynb
@@ -2749,12 +2749,12 @@
    "id": "7e1d4f00",
    "metadata": {},
    "source": [
-    "````{warning}\n",
-    "It is currently impossible to use a pandas boolean series **in front of** a selector; ie.\n",
+    "````{note}\n",
+    "It is only possible from version 0.5.2 to use a pandas boolean series **in front of** a selector; ie.\n",
     "```python\n",
     "(capacity[2030] > 250) & isin(variable=[\"Coal\", \"Gas\", \"Nuclear\"], unit=\"GW\")\n",
     "```\n",
-    "will **fail**, it needs to be\n",
+    "works, as you would expect it, in the same way as\n",
     "```python\n",
     "isin(variable=[\"Coal\", \"Gas\", \"Nuclear\"], unit=\"GW\") & (capacity[2030] > 250)\n",
     "```\n",
@@ -3142,8 +3142,20 @@
     {
      "data": {
       "text/plain": [
-       "array([False, False, False, False, False, False, False, False,  True,\n",
-       "        True,  True,  True])"
+       "model                  scenario                      region  unit  carrier      fuel     year\n",
+       "REMIND-MAgPIE 2.1-4.3  DeepElec_SSP2_HighRE_Budg900  World   GW    Electricity  Coal     2030    False\n",
+       "                                                                                         2040    False\n",
+       "                                                                                         2050    False\n",
+       "                                                                                         2060    False\n",
+       "                                                                                Gas      2030    False\n",
+       "                                                                                         2040    False\n",
+       "                                                                                         2050    False\n",
+       "                                                                                         2060    False\n",
+       "                                                                                Nuclear  2030     True\n",
+       "                                                                                         2040     True\n",
+       "                                                                                         2050     True\n",
+       "                                                                                         2060     True\n",
+       "dtype: bool"
       ]
      },
      "execution_count": 18,
@@ -3244,10 +3256,10 @@
     {
      "data": {
       "text/plain": [
-       "fuel     year  model                  scenario                      region  unit  carrier    \n",
-       "Coal     2030  REMIND-MAgPIE 2.1-4.3  DeepElec_SSP2_HighRE_Budg900  World   GW    Electricity    182.0149\n",
-       "Nuclear  2040  REMIND-MAgPIE 2.1-4.3  DeepElec_SSP2_HighRE_Budg900  World   GW    Electricity    214.4376\n",
-       "         2050  REMIND-MAgPIE 2.1-4.3  DeepElec_SSP2_HighRE_Budg900  World   GW    Electricity    156.7766\n",
+       "model                  scenario                      region  unit  carrier      fuel     year\n",
+       "REMIND-MAgPIE 2.1-4.3  DeepElec_SSP2_HighRE_Budg900  World   GW    Electricity  Coal     2030    182.0149\n",
+       "                                                                                Nuclear  2040    214.4376\n",
+       "                                                                                         2050    156.7766\n",
        "dtype: float64"
       ]
      },
@@ -6496,7 +6508,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.0"
+   "version": "3.12.2"
   },
   "vscode": {
    "interpreter": {

diff --git a/pyproject.toml b/pyproject.toml
@@ -17,7 +17,6 @@ classifiers = [
   'Operating System :: POSIX',
   'Operating System :: MacOS',
   'Operating System :: Microsoft',
-  'Programming Language :: Python :: 3.8',
   'Programming Language :: Python :: 3.9',
   'Programming Language :: Python :: 3.10',
   'Programming Language :: Python :: 3.11',
@@ -36,7 +35,7 @@ dependencies = [
   'deprecated',
   "attrs",
 ]
-requires-python = ">=3.8, <4"
+requires-python = ">=3.9, <4"
 
 dynamic = ["version"]
 

diff --git a/src/pandas_indexing/core.py b/src/pandas_indexing/core.py
@@ -1044,7 +1044,7 @@ def aggregatelevel(
     agg_func: str = "sum",
     axis: Axis = 0,
     dropna: bool = True,
-    mode: Literal["replace", "add", "return"] = "replace",
+    mode: Literal["replace", "append", "return"] = "replace",
     **levels: Dict[str, Sequence[Any]],
 ) -> T:
     """Aggregate labels on one or multiple levels together.

diff --git a/src/pandas_indexing/selectors.py b/src/pandas_indexing/selectors.py
@@ -19,8 +19,9 @@ def maybe_const(x):
 
 
 class Selector:
-    # Tell numpy that we want precedence
+    # Tell numpy and pandas that we want precedence
     __array_ufunc__ = None
+    __pandas_priority__ = 5000
 
     def __invert__(self):
         return Not(self)
@@ -49,8 +50,8 @@ class BinOp(Selector):
 class Const(Selector):
     val: Any
 
-    def __call__(self, _):
-        return self.val
+    def __call__(self, df):
+        return self.val(df) if callable(self.val) else self.val
 
 
 @define
@@ -70,7 +71,7 @@ class Not(Selector):
     a: Selector
 
     def __call__(self, df):
-        return ~self.a.__call__(df)
+        return ~self.a(df)
 
 
 class Special(Selector):

diff --git a/src/pandas_indexing/utils.py b/src/pandas_indexing/utils.py
@@ -6,8 +6,9 @@
 import importlib
 import re
 from types import ModuleType
-from typing import Union
+from typing import Any, Union
 
+from attrs import define
 from pandas import DataFrame, Index, Series
 from pandas.util._decorators import doc  # noqa: F401
 
@@ -147,3 +148,14 @@ def __getattr__(self, item):
     def __dir__(self):
         module = self._load()
         return dir(module)
+
+
+@define
+class EqualIdentity:
+    __array_ufunc__ = None
+    __pandas_priority__ = 5000
+
+    obj: Any
+
+    def __eq__(self, other):
+        return self.obj is other
diff --git a/tests/test_core.py b/tests/test_core.py
@@ -2,7 +2,6 @@
 Performs general tests.
 """
 
-import sys
 from re import escape
 from textwrap import dedent
 
@@ -497,26 +496,22 @@ def test_semijoin(mdf, mseries):
         ),
     )
 
-    # Python 3.12 changes the level order
-    level_order = (
-        ["new", "str", "num"]
-        if sys.version_info >= (3, 12, 0)
-        else ["str", "num", "new"]
-    )
-
     # Right-join
     assert_frame_equal(
         semijoin(mdf, index, how="right"),
         DataFrame(
             {col: r_[mdf[col].values[1:3], nan] for col in mdf},
-            index=index.reorder_levels(level_order),
+            index=index.reorder_levels(["new", "str", "num"]),
         ),
     )
 
     # Right-join on series
     assert_series_equal(
         semijoin(mseries, index, how="right"),
-        Series(r_[mseries.values[1:3], nan], index=index.reorder_levels(level_order)),
+        Series(
+            r_[mseries.values[1:3], nan],
+            index=index.reorder_levels(["new", "str", "num"]),
+        ),
     )
 
 

diff --git a/tests/test_selectors.py b/tests/test_selectors.py
@@ -15,6 +15,7 @@
     Not,
     Or,
 )
+from pandas_indexing.utils import EqualIdentity
 
 
 def test_isin_mseries(mseries: Series):
@@ -47,11 +48,14 @@ def test_isin_operations(mdf: DataFrame):
 
     s_b = Series([False, True, False], mdf.index)
     sel = isin(str="bar") | s_b
-    assert sel == Or(Isin(dict(str="bar")), Const(s_b))
+    assert sel == Or(Isin(dict(str="bar")), Const(EqualIdentity(s_b)))
 
-    with pytest.raises(AttributeError):
-        # Series then selector does not work yet
-        s_b & isin(str="bar")
+    sel = s_b & isin(str="bar")
+    assert sel == And(Isin(dict(str="bar")), Const(EqualIdentity(s_b)))
+
+    assert_frame_equal(
+        mdf.loc[isin(str="foo") & (lambda df: df.two >= 2)], mdf.iloc[[1]]
+    )
 
 
 def test_ismatch_single(sdf: DataFrame):