Skip to content

Commit

Permalink
REF: Minimize operations in recode_for_groupby (#59618)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Aug 26, 2024
1 parent bb4ab4f commit 15e9e7a
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions pandas/core/groupby/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,8 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
# In cases with c.ordered, this is equivalent to
# return c.remove_unused_categories(), c

unique_codes = unique1d(c.codes) # type: ignore[no-untyped-call]
take_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call]

take_codes = unique_codes[unique_codes != -1]
if sort:
take_codes = np.sort(take_codes)

Expand All @@ -67,17 +66,18 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica

# sort=False should order groups in as-encountered order (GH-8868)

# xref GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
all_codes = np.arange(c.categories.nunique())
# GH:46909: Re-ordering codes faster than using (set|add|reorder)_categories
# GH 38140: exclude nan from indexer for categories
unique_notnan_codes = unique1d(c.codes[c.codes != -1]) # type: ignore[no-untyped-call]
if sort:
unique_notnan_codes = np.sort(unique_notnan_codes)
if len(all_codes) > len(unique_notnan_codes):
if (num_cat := len(c.categories)) > len(unique_notnan_codes):
# GH 13179: All categories need to be present, even if missing from the data
missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True)
missing_codes = np.setdiff1d(
np.arange(num_cat), unique_notnan_codes, assume_unique=True
)
take_codes = np.concatenate((unique_notnan_codes, missing_codes))
else:
take_codes = unique_notnan_codes

return Categorical(c, c.unique().categories.take(take_codes))
return Categorical(c, c.categories.take(take_codes))

0 comments on commit 15e9e7a

Please sign in to comment.