Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(core): Add add_zeros_like function #51

Merged
merged 2 commits into from
Apr 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
Changelog
=========

v0.4.2 (2023-04-03)
------------------------------------------------------------
* Add :func:`~core.add_zeros_like` for adding explicit `levels` as 0 values :pull:`51`

v0.4.1 (2023-03-20)
------------------------------------------------------------
* Add :func:`~core.antijoin` for performing anti-joins :pull:`48`
Expand Down
1 change: 0 additions & 1 deletion requirements.txt

This file was deleted.

11 changes: 11 additions & 0 deletions src/pandas_indexing/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from . import arithmetics
from .core import (
add_zeros_like,
aggregatelevel,
antijoin,
assignlevel,
Expand Down Expand Up @@ -208,6 +209,16 @@ def aggregate(
self._obj, agg_func=agg_func, axis=axis, dropna=dropna, mode=mode, **levels
)

@doc(add_zeros_like, data="")
def add_zeros_like(
self,
reference: Union[MultiIndex, DataFrame, Series],
/,
derive: Optional[Dict[str, MultiIndex]] = None,
**levels: Sequence[str],
):
return add_zeros_like(self._obj, reference=reference, derive=derive, **levels)


def _create_forward_binop(op):
def forward_binop(
Expand Down
61 changes: 61 additions & 0 deletions src/pandas_indexing/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,3 +1075,64 @@ def has_any_label(index: MultiIndex, level: str, labels: Sequence[Any]):
raise ValueError(
f'mode must be "replace", "append" or "return", but is "{mode}"'
)


@doc(
data="""
data : Data
Series or DataFrame to extend with zeros\
"""
)
def add_zeros_like(
data: T,
reference: Union[MultiIndex, DataFrame, Series],
*,
derive: Optional[Dict[str, MultiIndex]] = None,
**levels: Sequence[str],
) -> T:
"""Add explicit `levels` to `data` as 0 values.

Remaining levels in `data` not found in `levels` or `derive` are taken from
`reference` (or its index).

Parameters
----------\
{data}
reference : Index
expected level labels (like model, scenario combinations)
derive : dict
derive labels in a level from a multiindex with allowed combinations
**levels : [str]
which labels should be added to df

Returns
-------
DataFrame
unsorted data with additional zero data
"""

if any(len(labels) == 0 for labels in levels.values()):
return data

if isinstance(reference, (Series, DataFrame)):
reference = reference.index

if derive is None:
derive = {}

target_levels = data.index.names
index = reference.pix.unique(
target_levels.difference(levels.keys()).difference(derive.keys())
)

zero_index = concat(
reduce(
lambda ind, d: ind.join(d, how="left"),
derive.values(),
index.pix.assign(**dict(zip(levels.keys(), labels))),
).reorder_levels(target_levels)
for labels in product(*levels.values())
)
zero_index = antijoin(zero_index, data.index)

return concat([data, pd.DataFrame(0, index=zero_index, columns=data.columns)])
13 changes: 13 additions & 0 deletions tests/test_accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from pandas import DataFrame, Index, MultiIndex
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal

import pandas_indexing # noqa: F401


def test_assign_index(midx: MultiIndex):
"""
Expand Down Expand Up @@ -138,3 +140,14 @@ def test_aggregate(mdf):
MultiIndex.from_tuples([("bar", 3), ("foo", "new")], names=["str", "num"]),
),
)


def test_add_zeros_like(mdf):
reference = MultiIndex.from_arrays(
[["foo", "foo", "bar", "baz"], [1, 2, 3, 4], ["a", "b", "c", "d"]],
names=["str", "num", "new"],
)
assert_frame_equal(
mdf.pix.add_zeros_like(reference),
mdf.reindex(reference.droplevel("new"), fill_value=0),
)
42 changes: 42 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal

from pandas_indexing.core import (
add_zeros_like,
aggregatelevel,
antijoin,
assignlevel,
Expand Down Expand Up @@ -612,3 +613,44 @@ def test_aggregatelevel(mdf):

with pytest.raises(ValueError):
aggregatelevel(mdf, num=dict(new=[1, 2]), mode="bla")


def test_add_zeros_like(mdf):
reference = MultiIndex.from_arrays(
[["foo", "foo", "bar", "baz"], [1, 2, 3, 4], ["a", "b", "c", "d"]],
names=["str", "num", "new"],
)
assert_frame_equal(
add_zeros_like(mdf, reference),
mdf.reindex(reference.droplevel("new"), fill_value=0),
)

assert_frame_equal(
add_zeros_like(mdf, Series(0, reference)),
mdf.reindex(reference.droplevel("new"), fill_value=0),
)

assert_frame_equal(add_zeros_like(mdf, reference, blub=[]), mdf)

missing = MultiIndex.from_arrays(
[["bar", "baz", "foo", "baz"], [2, 2, 3, 3]], names=["str", "num"]
)
assert_frame_equal(
add_zeros_like(mdf, reference, num=[2, 3]),
mdf.reindex(mdf.index.append(missing), fill_value=0),
)

def add_first(df):
index = df if isinstance(df, Index) else df.index
return assignlevel(df, first=projectlevel(index, "str").str[:1])

mdf_w_first = add_first(mdf)
assert_frame_equal(
add_zeros_like(
mdf_w_first,
reference,
num=[2, 3],
derive=dict(first=add_first(Index(["foo", "bar", "baz"], name="str"))),
),
mdf_w_first.reindex(mdf_w_first.index.append(add_first(missing)), fill_value=0),
)