Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(selectors): Allow mixing selectors with Series and callables #59

Merged
merged 7 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
# fail-fast: false
matrix:
platform: [ubuntu-latest, macos-latest, windows-latest]
python-version: [3.8, 3.12] # to minimise complexity we only test a min and a max version
python-version: [3.9, 3.12] # to minimise complexity we only test a min and a max version
# you can separate the tox-envs in different .yml files
# see version 0.10.1
# https://github.com/coroa/pandas-indexing/releases/tag/v0.10.1
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
Changelog
=========

* Bumps minimum python version to 3.9 (which is close to EOL, anyway)
* Improve :mod:`~selectors` to arbitrarily interact with boolean Series, numpy arrays
and callables, ie.
1. ``pd.Series([True, False]) & isin(model="a")`` produces the same result as
``isin(model="a") & pd.Series([True, False])`` did earlier.
2. ``isin(model="a") & (lambda s: s > 2)`` is now supported as well.
* Fix a testing incompability introduced by a recent attrs update (24.1.0)
* Load pint and pint-pandas packages only on first use

v0.5.1 (2024-05-20)
Expand Down
32 changes: 22 additions & 10 deletions docs/notebooks/introduction.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2749,12 +2749,12 @@
"id": "7e1d4f00",
"metadata": {},
"source": [
"````{warning}\n",
"It is currently impossible to use a pandas boolean series **in front of** a selector; ie.\n",
"````{note}\n",
"It is only possible from version 0.5.2 to use a pandas boolean series **in front of** a selector; ie.\n",
"```python\n",
"(capacity[2030] > 250) & isin(variable=[\"Coal\", \"Gas\", \"Nuclear\"], unit=\"GW\")\n",
"```\n",
"will **fail**, it needs to be\n",
"works, as you would expect it, in the same way as\n",
"```python\n",
"isin(variable=[\"Coal\", \"Gas\", \"Nuclear\"], unit=\"GW\") & (capacity[2030] > 250)\n",
"```\n",
Expand Down Expand Up @@ -3142,8 +3142,20 @@
{
"data": {
"text/plain": [
"array([False, False, False, False, False, False, False, False, True,\n",
" True, True, True])"
"model scenario region unit carrier fuel year\n",
"REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity Coal 2030 False\n",
" 2040 False\n",
" 2050 False\n",
" 2060 False\n",
" Gas 2030 False\n",
" 2040 False\n",
" 2050 False\n",
" 2060 False\n",
" Nuclear 2030 True\n",
" 2040 True\n",
" 2050 True\n",
" 2060 True\n",
"dtype: bool"
]
},
"execution_count": 18,
Expand Down Expand Up @@ -3244,10 +3256,10 @@
{
"data": {
"text/plain": [
"fuel year model scenario region unit carrier \n",
"Coal 2030 REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity 182.0149\n",
"Nuclear 2040 REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity 214.4376\n",
" 2050 REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity 156.7766\n",
"model scenario region unit carrier fuel year\n",
"REMIND-MAgPIE 2.1-4.3 DeepElec_SSP2_HighRE_Budg900 World GW Electricity Coal 2030 182.0149\n",
" Nuclear 2040 214.4376\n",
" 2050 156.7766\n",
"dtype: float64"
]
},
Expand Down Expand Up @@ -6496,7 +6508,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
"version": "3.12.2"
},
"vscode": {
"interpreter": {
Expand Down
3 changes: 1 addition & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ classifiers = [
'Operating System :: POSIX',
'Operating System :: MacOS',
'Operating System :: Microsoft',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
Expand All @@ -36,7 +35,7 @@ dependencies = [
'deprecated',
"attrs",
]
requires-python = ">=3.8, <4"
requires-python = ">=3.9, <4"

dynamic = ["version"]

Expand Down
2 changes: 1 addition & 1 deletion src/pandas_indexing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,7 @@ def aggregatelevel(
agg_func: str = "sum",
axis: Axis = 0,
dropna: bool = True,
mode: Literal["replace", "add", "return"] = "replace",
mode: Literal["replace", "append", "return"] = "replace",
**levels: Dict[str, Sequence[Any]],
) -> T:
"""Aggregate labels on one or multiple levels together.
Expand Down
9 changes: 5 additions & 4 deletions src/pandas_indexing/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ def maybe_const(x):


class Selector:
# Tell numpy that we want precedence
# Tell numpy and pandas that we want precedence
__array_ufunc__ = None
__pandas_priority__ = 5000

def __invert__(self):
return Not(self)
Expand Down Expand Up @@ -49,8 +50,8 @@ class BinOp(Selector):
class Const(Selector):
val: Any

def __call__(self, _):
return self.val
def __call__(self, df):
return self.val(df) if callable(self.val) else self.val


@define
Expand All @@ -70,7 +71,7 @@ class Not(Selector):
a: Selector

def __call__(self, df):
return ~self.a.__call__(df)
return ~self.a(df)


class Special(Selector):
Expand Down
14 changes: 13 additions & 1 deletion src/pandas_indexing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import importlib
import re
from types import ModuleType
from typing import Union
from typing import Any, Union

from attrs import define
from pandas import DataFrame, Index, Series
from pandas.util._decorators import doc # noqa: F401

Expand Down Expand Up @@ -147,3 +148,14 @@ def __getattr__(self, item):
def __dir__(self):
module = self._load()
return dir(module)


@define
class EqualIdentity:
__array_ufunc__ = None
__pandas_priority__ = 5000

obj: Any

def __eq__(self, other):
return self.obj is other
15 changes: 5 additions & 10 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Performs general tests.
"""

import sys
from re import escape
from textwrap import dedent

Expand Down Expand Up @@ -497,26 +496,22 @@ def test_semijoin(mdf, mseries):
),
)

# Python 3.12 changes the level order
level_order = (
["new", "str", "num"]
if sys.version_info >= (3, 12, 0)
else ["str", "num", "new"]
)

# Right-join
assert_frame_equal(
semijoin(mdf, index, how="right"),
DataFrame(
{col: r_[mdf[col].values[1:3], nan] for col in mdf},
index=index.reorder_levels(level_order),
index=index.reorder_levels(["new", "str", "num"]),
),
)

# Right-join on series
assert_series_equal(
semijoin(mseries, index, how="right"),
Series(r_[mseries.values[1:3], nan], index=index.reorder_levels(level_order)),
Series(
r_[mseries.values[1:3], nan],
index=index.reorder_levels(["new", "str", "num"]),
),
)


Expand Down
12 changes: 8 additions & 4 deletions tests/test_selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
Not,
Or,
)
from pandas_indexing.utils import EqualIdentity


def test_isin_mseries(mseries: Series):
Expand Down Expand Up @@ -47,11 +48,14 @@ def test_isin_operations(mdf: DataFrame):

s_b = Series([False, True, False], mdf.index)
sel = isin(str="bar") | s_b
assert sel == Or(Isin(dict(str="bar")), Const(s_b))
assert sel == Or(Isin(dict(str="bar")), Const(EqualIdentity(s_b)))

with pytest.raises(AttributeError):
# Series then selector does not work yet
s_b & isin(str="bar")
sel = s_b & isin(str="bar")
assert sel == And(Isin(dict(str="bar")), Const(EqualIdentity(s_b)))

assert_frame_equal(
mdf.loc[isin(str="foo") & (lambda df: df.two >= 2)], mdf.iloc[[1]]
)


def test_ismatch_single(sdf: DataFrame):
Expand Down