From 9b482ab1c4eab8ea9a272b5826d83969db951f75 Mon Sep 17 00:00:00 2001 From: Jonas Hoersch Date: Wed, 26 Jul 2023 01:09:05 +0200 Subject: [PATCH 1/3] Add fill_value argument to semijoin --- src/pandas_indexing/accessors.py | 12 +++++++++++- src/pandas_indexing/core.py | 10 ++++++++-- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/pandas_indexing/accessors.py b/src/pandas_indexing/accessors.py index 0fd4552..59a3046 100644 --- a/src/pandas_indexing/accessors.py +++ b/src/pandas_indexing/accessors.py @@ -15,6 +15,7 @@ import pandas as pd from deprecated.sphinx import deprecated from pandas import DataFrame, Index, MultiIndex, Series +from pandas.api.extensions import no_default from . import arithmetics from .core import ( @@ -131,8 +132,17 @@ def semijoin( level: Union[str, int, None] = None, sort: bool = False, axis: Axis = 0, + fill_value: Any = no_default, ) -> Union[DataFrame, Series]: - return semijoin(self._obj, other, how=how, level=level, sort=sort, axis=axis) + return semijoin( + self._obj, + other, + how=how, + level=level, + sort=sort, + axis=axis, + fill_value=fill_value, + ) def multiply(self, other, **align_kwds): return arithmetics.multiply(self._obj, other, **align_kwds) diff --git a/src/pandas_indexing/core.py b/src/pandas_indexing/core.py index d3dcae0..eec692b 100644 --- a/src/pandas_indexing/core.py +++ b/src/pandas_indexing/core.py @@ -22,6 +22,7 @@ import pandas as pd from deprecated import deprecated from pandas import DataFrame, Index, MultiIndex, Series +from pandas.api.extensions import no_default from pandas.core.indexes.frozen import FrozenList from .types import Axis, Data, S, T @@ -461,6 +462,7 @@ def semijoin( level: Union[str, int, None] = None, sort: bool = False, axis: Axis = 0, + fill_value: Any = no_default, ) -> S: """Semijoin ``data`` by index ``other``. @@ -477,6 +479,8 @@ def semijoin( Whether to sort the index axis : {{0, 1, "index", "columns"}} Axis on which to join + fill_value + Value for filling gaps introduced by right or outer joins Returns ------- @@ -516,11 +520,13 @@ def semijoin( data = frame_or_series.iloc[:, left_idx] index = data.columns if any_missing: - data = data.where(pd.Series(left_idx != -1, index), axis=axis) + data = data.where( + pd.Series(left_idx != -1, index), other=fill_value, axis=axis + ) elif isinstance(frame_or_series, Series): data = frame_or_series.iloc[left_idx] if any_missing: - data = data.where(left_idx != -1) + data = data.where(left_idx != -1, other=fill_value) else: raise TypeError( f"frame_or_series must derive from DataFrame or Series, but is {type(frame_or_series)}" From b3ff5114b73dc002399400f0a6d6c2e8d769c172 Mon Sep 17 00:00:00 2001 From: Jonas Hoersch Date: Wed, 26 Jul 2023 01:18:05 +0200 Subject: [PATCH 2/3] Defer loading of openscm_units --- src/pandas_indexing/units.py | 15 +++------------ tests/test_units.py | 6 +++++- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/pandas_indexing/units.py b/src/pandas_indexing/units.py index 5c935da..35c2598 100644 --- a/src/pandas_indexing/units.py +++ b/src/pandas_indexing/units.py @@ -51,7 +51,6 @@ -------- pint.set_application_registry """ - from typing import Callable, Mapping, Optional, Union from pandas import DataFrame, Series @@ -73,12 +72,6 @@ except ImportError: has_pint = False -try: - import openscm_units - - has_openscm_units = True -except ImportError: - has_openscm_units = False from .core import assignlevel, uniquelevel from .types import Axis, Data @@ -306,12 +299,12 @@ def _convert_unit(df, old_unit=None): _openscm_registry = None -def get_openscm_registry(add_co2e: bool = True) -> "openscm_units.ScmUnitRegistry": +def get_openscm_registry(add_co2e: bool = True): global _openscm_registry if _openscm_registry is not None: return _openscm_registry - assert has_openscm_units, INSTALL_PACKAGE_WARNING.format(package="openscm-units") + import openscm_units if add_co2e: _openscm_registry = openscm_units.ScmUnitRegistry() @@ -324,9 +317,7 @@ def get_openscm_registry(add_co2e: bool = True) -> "openscm_units.ScmUnitRegistr return _openscm_registry -def set_openscm_registry_as_default( - add_co2e: bool = True, -) -> "openscm_units.ScmUnitRegistry": +def set_openscm_registry_as_default(add_co2e: bool = True): unit_registry = get_openscm_registry(add_co2e=add_co2e) assert has_pint, INSTALL_PACKAGE_WARNING.format(package="pint") diff --git a/tests/test_units.py b/tests/test_units.py index bac540f..3222e4c 100644 --- a/tests/test_units.py +++ b/tests/test_units.py @@ -1,10 +1,14 @@ +from importlib.util import find_spec + import pytest from pandas import DataFrame, Series from pandas.testing import assert_frame_equal, assert_series_equal from pandas_indexing import assignlevel, convert_unit, set_openscm_registry_as_default -from pandas_indexing.units import has_openscm_units, has_pint, has_pint_pandas, is_unit +from pandas_indexing.units import has_pint, has_pint_pandas, is_unit + +has_openscm_units = bool(find_spec("openscm_units")) needs_pint = pytest.mark.skipif(not has_pint, reason="Needs pint package") needs_openscm_units = pytest.mark.skipif( From 388e1460ab9e12a6cbca06745f556884a18484c9 Mon Sep 17 00:00:00 2001 From: Jonas Hoersch Date: Wed, 26 Jul 2023 01:21:58 +0200 Subject: [PATCH 3/3] Revise arithmetics - Add unit aware methods `unitadd`, `unitdiv`, ... - Add assign argument to integrate level modifications - Align properly in op([Series], [DataFrame]) order --- src/pandas_indexing/accessors.py | 53 +++++++++++---- src/pandas_indexing/arithmetics.py | 103 ++++++++++++++++++++--------- 2 files changed, 114 insertions(+), 42 deletions(-) diff --git a/src/pandas_indexing/accessors.py b/src/pandas_indexing/accessors.py index 59a3046..2f33727 100644 --- a/src/pandas_indexing/accessors.py +++ b/src/pandas_indexing/accessors.py @@ -144,18 +144,6 @@ def semijoin( fill_value=fill_value, ) - def multiply(self, other, **align_kwds): - return arithmetics.multiply(self._obj, other, **align_kwds) - - def divide(self, other, **align_kwds): - return arithmetics.divide(self._obj, other, **align_kwds) - - def add(self, other, **align_kwds): - return arithmetics.add(self._obj, other, **align_kwds) - - def subtract(self, other, **align_kwds): - return arithmetics.subtract(self._obj, other, **align_kwds) - @doc(quantify, data="", example_call="s.pix.quantify()") def quantify( self, @@ -205,6 +193,47 @@ def aggregate( ) +def _create_forward_binop(op): + def forward_binop( + self, + other: Data, + assign: Optional[Dict[str, Any]] = None, + axis: Optional[Axis] = None, + **align_kwargs: Any, + ): + return getattr(arithmetics, op)( + self._obj, other, assign=assign, axis=axis, **align_kwargs + ) + + return forward_binop + + +def _create_forward_unitbinop(op): + def forward_unitbinop( + self, + other: Data, + level: str = "unit", + assign: Optional[Dict[str, Any]] = None, + axis: Optional[Axis] = None, + **align_kwargs: Any, + ): + return getattr(arithmetics, f"unit{op}")( + self._obj, other, level=level, assign=assign, axis=axis, **align_kwargs + ) + + return forward_unitbinop + + +for op in arithmetics.ARITHMETIC_BINOPS: + forward_binop = _create_forward_binop(op) + forward_unitbinop = _create_forward_unitbinop(op) + setattr(_DataPixAccessor, op, forward_binop) + setattr(_DataPixAccessor, f"unit{op}", forward_unitbinop) + for alt in arithmetics.ALTERNATIVE_NAMES.get(op, []): + setattr(_DataPixAccessor, alt, forward_binop) + setattr(_DataPixAccessor, f"unit{alt}", forward_unitbinop) + + @pd.api.extensions.register_dataframe_accessor("pix") class DataFramePixAccessor(_DataPixAccessor): pass diff --git a/src/pandas_indexing/arithmetics.py b/src/pandas_indexing/arithmetics.py index 3c00594..775f722 100644 --- a/src/pandas_indexing/arithmetics.py +++ b/src/pandas_indexing/arithmetics.py @@ -14,11 +14,21 @@ pandas.DataFrame.align """ -from typing import Any, Mapping, Tuple +import operator +from typing import Any, Dict, Optional from pandas import DataFrame, Series +from pandas.core.ops import ARITHMETIC_BINOPS -from .types import Data +from .core import assignlevel, uniquelevel +from .types import Axis, Data + + +ALTERNATIVE_NAMES = { + "truediv": ["div", "divide"], + "mul": ["multiply"], + "sub": ["subtract"], +} def _needs_axis(df: Data, other: Data) -> bool: @@ -27,31 +37,64 @@ def _needs_axis(df: Data, other: Data) -> bool: ) -def _prepare_op( - df: Data, other: Data, kwargs: Mapping[str, Any] -) -> Tuple[Data, Data, Mapping[str, Any]]: - kwargs.setdefault("copy", True) - if _needs_axis(df, other): - kwargs.setdefault("axis", 0) - df, other = df.align(other, **kwargs) - return df, other, kwargs - - -def add(df: Data, other: Data, **align_kwargs: Any) -> Data: - df, other, align_kwargs = _prepare_op(df, other, align_kwargs) - return df.add(other, axis=align_kwargs.get("axis", 0)) - - -def divide(df: Data, other: Data, **align_kwargs: Any) -> Data: - df, other, align_kwargs = _prepare_op(df, other, align_kwargs) - return df.div(other, axis=align_kwargs.get("axis", 0)) - - -def multiply(df: Data, other: Data, **align_kwargs: Any) -> Data: - df, other, align_kwargs = _prepare_op(df, other, align_kwargs) - return df.mul(other, axis=align_kwargs.get("axis", 0)) - - -def subtract(df: Data, other: Data, **align_kwargs: Any) -> Data: - df, other, align_kwargs = _prepare_op(df, other, align_kwargs) - return df.sub(other, axis=align_kwargs.get("axis", 0)) +def _create_binop(op: str): + def binop( + df: Data, + other: Data, + assign: Optional[Dict[str, Any]] = None, + axis: Optional[Axis] = None, + **align_kwargs: Any, + ): + if assign is not None: + df = assignlevel(df, **assign) + other = assignlevel(other, **assign) + + align_kwargs.setdefault("copy", False) + if _needs_axis(df, other): + axis = 0 + if isinstance(df, Series) and isinstance(other, DataFrame): + if align_kwargs.get("join") in ("left", "right"): + align_kwargs["join"] = {"left": "right", "right": "left"}[ + align_kwargs["join"] + ] + other, df = other.align(df, axis=axis, **align_kwargs) + else: + df, other = df.align(other, axis=axis, **align_kwargs) + + return getattr(df, op)(other, axis=axis) + + return binop + + +def _create_unitbinop(op, binop): + def unitbinop( + df: Data, + other: Data, + level: str = "unit", + assign: Optional[Dict[str, Any]] = None, + axis: Optional[Axis] = None, + **align_kwargs: Any, + ): + df_unit = uniquelevel(df, level, axis=axis).item() + other_unit = uniquelevel(other, level, axis=axis).item() + + import pint + + ur = pint.get_application_registry() + quantity = getattr(operator, op)(ur(df_unit), ur(other_unit)).to_reduced_units() + + if assign is None: + assign = dict() + assign = {level: f"{quantity.units:~}"} | assign + + return binop(df, other, assign=assign, axis=axis, **align_kwargs) * quantity.m + + return unitbinop + + +for op in ARITHMETIC_BINOPS: + binop = _create_binop(op) + unitbinop = _create_unitbinop(op, binop) + globals().update({op: binop, f"unit{op}": unitbinop}) + for alt in ALTERNATIVE_NAMES.get(op, []): + globals().update({alt: binop, f"unit{alt}": unitbinop})