Skip to content

Commit

Permalink
feat(core): Add add_zeros_like function
Browse files Browse the repository at this point in the history
  • Loading branch information
coroa committed Apr 3, 2024
1 parent 1ff095c commit 93bba4e
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 1 deletion.
1 change: 0 additions & 1 deletion requirements.txt

This file was deleted.

11 changes: 11 additions & 0 deletions src/pandas_indexing/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from . import arithmetics
from .core import (
add_zeros_like,
aggregatelevel,
antijoin,
assignlevel,
Expand Down Expand Up @@ -208,6 +209,16 @@ def aggregate(
self._obj, agg_func=agg_func, axis=axis, dropna=dropna, mode=mode, **levels
)

@doc(add_zeros_like, data="")
def add_zeros_like(
self,
reference: Union[MultiIndex, DataFrame, Series],
/,
derive: Optional[Dict[str, MultiIndex]] = None,
**levels: Sequence[str],
):
return add_zeros_like(self._obj, reference=reference, derive=derive, **levels)


def _create_forward_binop(op):
def forward_binop(
Expand Down
61 changes: 61 additions & 0 deletions src/pandas_indexing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,3 +1075,64 @@ def has_any_label(index: MultiIndex, level: str, labels: Sequence[Any]):
raise ValueError(
f'mode must be "replace", "append" or "return", but is "{mode}"'
)


@doc(
data="""
data : Data
Series or DataFrame to extend with zeros\
"""
)
def add_zeros_like(
data: T,
reference: Union[MultiIndex, DataFrame, Series],
*,
derive: Optional[Dict[str, MultiIndex]] = None,
**levels: Sequence[str],
) -> T:
"""Add explicit `levels` to `data` as 0 values.
Remaining levels in `data` not found in `levels` or `derive` are taken from
`reference` (or its index).
Parameters
----------\
{data}
reference : Index
expected level labels (like model, scenario combinations)
derive : dict
derive labels in a level from a multiindex with allowed combinations
**levels : [str]
which labels should be added to df
Returns
-------
DataFrame
unsorted data with additional zero data
"""

if any(len(labels) == 0 for labels in levels.values()):
return data

if isinstance(reference, (Series, DataFrame)):
reference = reference.index

if derive is None:
derive = {}

target_levels = data.index.names
index = reference.pix.unique(
target_levels.difference(levels.keys()).difference(derive.keys())
)

zero_index = concat(
reduce(
lambda ind, d: ind.join(d, how="left"),
derive.values(),
index.pix.assign(**dict(zip(levels.keys(), labels))),
).reorder_levels(target_levels)
for labels in product(*levels.values())
)
zero_index = antijoin(zero_index, data.index)

return concat([data, pd.DataFrame(0, index=zero_index, columns=data.columns)])
13 changes: 13 additions & 0 deletions tests/test_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from pandas import DataFrame, Index, MultiIndex
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal

import pandas_indexing # noqa: F401


def test_assign_index(midx: MultiIndex):
"""
Expand Down Expand Up @@ -138,3 +140,14 @@ def test_aggregate(mdf):
MultiIndex.from_tuples([("bar", 3), ("foo", "new")], names=["str", "num"]),
),
)


def test_add_zeros_like(mdf):
reference = MultiIndex.from_arrays(
[["foo", "foo", "bar", "baz"], [1, 2, 3, 4], ["a", "b", "c", "d"]],
names=["str", "num", "new"],
)
assert_frame_equal(
mdf.pix.add_zeros_like(reference),
mdf.reindex(reference.droplevel("new"), fill_value=0),
)
42 changes: 42 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal

from pandas_indexing.core import (
add_zeros_like,
aggregatelevel,
antijoin,
assignlevel,
Expand Down Expand Up @@ -612,3 +613,44 @@ def test_aggregatelevel(mdf):

with pytest.raises(ValueError):
aggregatelevel(mdf, num=dict(new=[1, 2]), mode="bla")


def test_add_zeros_like(mdf):
reference = MultiIndex.from_arrays(
[["foo", "foo", "bar", "baz"], [1, 2, 3, 4], ["a", "b", "c", "d"]],
names=["str", "num", "new"],
)
assert_frame_equal(
add_zeros_like(mdf, reference),
mdf.reindex(reference.droplevel("new"), fill_value=0),
)

assert_frame_equal(
add_zeros_like(mdf, Series(0, reference)),
mdf.reindex(reference.droplevel("new"), fill_value=0),
)

assert_frame_equal(add_zeros_like(mdf, reference, blub=[]), mdf)

missing = MultiIndex.from_arrays(
[["bar", "baz", "foo", "baz"], [2, 2, 3, 3]], names=["str", "num"]
)
assert_frame_equal(
add_zeros_like(mdf, reference, num=[2, 3]),
mdf.reindex(mdf.index.append(missing), fill_value=0),
)

def add_first(df):
index = df if isinstance(df, Index) else df.index
return assignlevel(df, first=projectlevel(index, "str").str[:1])

mdf_w_first = add_first(mdf)
assert_frame_equal(
add_zeros_like(
mdf_w_first,
reference,
num=[2, 3],
derive=dict(first=add_first(Index(["foo", "bar", "baz"], name="str"))),
),
mdf_w_first.reindex(mdf_w_first.index.append(add_first(missing)), fill_value=0),
)

0 comments on commit 93bba4e

Please sign in to comment.