-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH union_categoricals supports ignore_order GH13410 #15219
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -208,7 +208,7 @@ def _concat_asobject(to_concat): | |
return _concat_asobject(to_concat) | ||
|
||
|
||
def union_categoricals(to_union, sort_categories=False): | ||
def union_categoricals(to_union, sort_categories=False, ignore_order=False): | ||
""" | ||
Combine list-like of Categorical-like, unioning categories. All | ||
categories must have the same dtype. | ||
|
@@ -222,6 +222,11 @@ def union_categoricals(to_union, sort_categories=False): | |
sort_categories : boolean, default False | ||
If true, resulting categories will be lexsorted, otherwise | ||
they will be ordered as they appear in the data. | ||
ignore_order: boolean, default False | ||
If true, the ordered attribute of the Categoricals will be ignored. | ||
Results in an unordered categorical. | ||
|
||
.. versionadded:: 0.20.0 | ||
|
||
Returns | ||
------- | ||
|
@@ -235,7 +240,7 @@ def union_categoricals(to_union, sort_categories=False): | |
- all inputs are ordered and their categories are not identical | ||
- sort_categories=True and Categoricals are ordered | ||
ValueError | ||
Emmpty list of categoricals passed | ||
Empty list of categoricals passed | ||
""" | ||
from pandas import Index, Categorical, CategoricalIndex, Series | ||
|
||
|
@@ -264,15 +269,15 @@ def _maybe_unwrap(x): | |
ordered = first.ordered | ||
new_codes = np.concatenate([c.codes for c in to_union]) | ||
|
||
if sort_categories and ordered: | ||
if sort_categories and not ignore_order and ordered: | ||
raise TypeError("Cannot use sort_categories=True with " | ||
"ordered Categoricals") | ||
|
||
if sort_categories and not categories.is_monotonic_increasing: | ||
categories = categories.sort_values() | ||
indexer = categories.get_indexer(first.categories) | ||
new_codes = take_1d(indexer, new_codes, fill_value=-1) | ||
elif all(not c.ordered for c in to_union): | ||
elif ignore_order or all(not c.ordered for c in to_union): | ||
# different categories - union and recode | ||
cats = first.categories.append([c.categories for c in to_union[1:]]) | ||
categories = Index(cats.unique()) | ||
|
@@ -297,6 +302,9 @@ def _maybe_unwrap(x): | |
else: | ||
raise TypeError('Categorical.ordered must be the same') | ||
|
||
if ignore_order: | ||
ordered = False | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think ordered is already False? (line 263) Is this still needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The if statement on line 264 can be entered if the ordered categoricals have the same categories and order. is_dtype_equal checks categories and ordering |
||
|
||
return Categorical(new_codes, categories=categories, ordered=ordered, | ||
fastpath=True) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a versionadded 0.20.0 tag
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you add the versionadded tag