From a04dd8b388bcd6a5f4658e9e409a7b5cfcf43425 Mon Sep 17 00:00:00 2001 From: Doug Branton Date: Wed, 17 Apr 2024 14:46:56 -0700 Subject: [PATCH] speedup for count_nested by=None --- src/nested_pandas/utils/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nested_pandas/utils/utils.py b/src/nested_pandas/utils/utils.py index 259f3fc..662e1d2 100644 --- a/src/nested_pandas/utils/utils.py +++ b/src/nested_pandas/utils/utils.py @@ -28,7 +28,8 @@ def count_nested(df, nested, by=None, join=True) -> NestedFrame: """ if by is None: - counts = df[nested].apply(lambda x: len(x)).rename(f"n_{nested}") + # to_flat() is faster than direct apply in this case + counts = df[nested].nest.to_flat().groupby(level=0).apply(lambda x: len(x)).rename(f"n_{nested}") else: counts = df[nested].apply(lambda x: x[by].value_counts()) counts = counts.rename(columns={colname: f"n_{nested}_{colname}" for colname in counts.columns})