Skip to content

Commit

Permalink
speedup for count_nested by=None
Browse files Browse the repository at this point in the history
  • Loading branch information
dougbrn committed Apr 17, 2024
1 parent 3aff79e commit a04dd8b
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/nested_pandas/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame:
"""

if by is None:
counts = df[nested].apply(lambda x: len(x)).rename(f"n_{nested}")
# to_flat() is faster than direct apply in this case
counts = df[nested].nest.to_flat().groupby(level=0).apply(lambda x: len(x)).rename(f"n_{nested}")
else:
counts = df[nested].apply(lambda x: x[by].value_counts())
counts = counts.rename(columns={colname: f"n_{nested}_{colname}" for colname in counts.columns})
Expand Down

0 comments on commit a04dd8b

Please sign in to comment.