diff --git a/src/pandas_indexing/accessors.py b/src/pandas_indexing/accessors.py index 42b7f2f..bebff5c 100644 --- a/src/pandas_indexing/accessors.py +++ b/src/pandas_indexing/accessors.py @@ -102,9 +102,9 @@ def extract( @doc(formatlevel, index_or_data="") def format( - self, axis: Axis = 0, **templates: str + self, axis: Axis = 0, optional: Sequence[str] | None = None, **templates: str ) -> Union[DataFrame, Series, Index]: - return formatlevel(self._obj, axis=axis, **templates) + return formatlevel(self._obj, axis=axis, optional=optional, **templates) @doc(uniquelevel, index_or_data="") def unique( diff --git a/src/pandas_indexing/core.py b/src/pandas_indexing/core.py index 077736e..d76868e 100644 --- a/src/pandas_indexing/core.py +++ b/src/pandas_indexing/core.py @@ -905,7 +905,13 @@ def extractlevel( return index_or_data -def _formatlevel(index: Index, drop: bool = False, **templates: str) -> Index: +def _formatlevel( + index: Index, + drop: bool = False, + optional: frozenset[str] = frozenset(), + fallback: str = "Total", + **templates: str, +) -> Index: levels = {} used_levels = set() for dim, template in templates.items(): @@ -915,7 +921,16 @@ def _formatlevel(index: Index, drop: bool = False, **templates: str) -> Index: for m in re.finditer(r"\{([a-zA-Z_]+)\}", template): level = m.group(1) start, end = m.span() - string += template[prev_end:start] + projectlevel(index, level).astype(str) + + labels = projectlevel(index, level).astype(str) + if level in optional: + if template[start - 1] == "|": + start -= 1 + labels = ("|" + labels).where(labels != fallback, "") + else: + labels = labels.where(labels != fallback, "") + + string += template[prev_end:start] + labels prev_end = end used_levels.add(level) string += template[prev_end:] @@ -955,11 +970,15 @@ def formatlevel( index_or_data: T, drop: bool = False, axis: Axis = 0, + optional: Sequence[str] | None = None, **templates: str, ) -> T: """Format index levels based on a *template* which can refer to other levels. + .. versionchanged:: 0.5.3 + Added optional patterns. + Parameters ----------\ {index_or_data} @@ -967,6 +986,8 @@ def formatlevel( Whether to drop the used index levels axis : {{0, 1, "index", "columns"}}, default 0 Axis of DataFrame to modify + optional : [str], optional + Marks levels as optional (including a leading | character) **templates : str Format templates for one or multiple levels @@ -979,11 +1000,15 @@ def formatlevel( ValueError If *templates* refer to non-existant levels """ + optional = frozenset() if optional is None else frozenset(optional) + if isinstance(index_or_data, Index): - return _formatlevel(index_or_data, drop, **templates) + return _formatlevel(index_or_data, drop, optional=optional, **templates) index = get_axis(index_or_data, axis) - return index_or_data.set_axis(_formatlevel(index, drop, **templates), axis=axis) + return index_or_data.set_axis( + _formatlevel(index, drop, optional=optional, **templates), axis=axis + ) def _fixindexna(index: Index): diff --git a/tests/test_core.py b/tests/test_core.py index ac36565..2deeb98 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -192,6 +192,13 @@ def test_formatlevel_options(mdf: DataFrame): ), ) + # optional + mdf_total = assignlevel(mdf, num=["one", "two", "Total"]) + assert_frame_equal( + formatlevel(mdf_total, new="{str}|{num}", drop=True, optional=["num"]), + mdf_total.set_axis(Index(idx_str + Index(["|one", "|two", ""]), name="new")), + ) + def test_formatlevel_data(mdf, mseries, midx): idx_str = midx.get_level_values(0)