-
Notifications
You must be signed in to change notification settings - Fork 358
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement DataFrame/Series rename_axis #1843
Changes from all commits
caafa70
bfc5677
f813c13
d45f65b
5085f77
0e5d4f0
7c56e21
9e819aa
f21b6ba
edcef50
ae98c31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9122,6 +9122,168 @@ def gen_new_column_labels_entry(column_labels_entry): | |
else: | ||
return DataFrame(internal) | ||
|
||
def rename_axis( | ||
self, | ||
mapper: Optional[Any] = None, | ||
index: Optional[Any] = None, | ||
columns: Optional[Any] = None, | ||
axis: Optional[Union[int, str]] = 0, | ||
inplace: Optional[bool] = False, | ||
) -> Optional["DataFrame"]: | ||
""" | ||
Set the name of the axis for the index or columns. | ||
|
||
Parameters | ||
---------- | ||
mapper : scalar, list-like, optional | ||
A scalar, list-like, dict-like or functions transformations to | ||
apply to the axis name attribute. | ||
index, columns : scalar, list-like, dict-like or function, optional | ||
A scalar, list-like, dict-like or functions transformations to | ||
apply to that axis' values. | ||
|
||
Use either ``mapper`` and ``axis`` to | ||
specify the axis to target with ``mapper``, or ``index`` | ||
and/or ``columns``. | ||
axis : {0 or 'index', 1 or 'columns'}, default 0 | ||
The axis to rename. | ||
inplace : bool, default False | ||
Modifies the object directly, instead of creating a new DataFrame. | ||
|
||
Returns | ||
------- | ||
DataFrame, or None if `inplace` is True. | ||
|
||
See Also | ||
-------- | ||
Series.rename : Alter Series index labels or name. | ||
DataFrame.rename : Alter DataFrame index labels or name. | ||
Index.rename : Set new names on index. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can also have There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I did not put them as I split |
||
|
||
Notes | ||
----- | ||
``DataFrame.rename_axis`` supports two calling conventions | ||
|
||
* ``(index=index_mapper, columns=columns_mapper, ...)`` | ||
* ``(mapper, axis={'index', 'columns'}, ...)`` | ||
|
||
The first calling convention will only modify the names of | ||
the index and/or the names of the Index object that is the columns. | ||
|
||
The second calling convention will modify the names of the | ||
corresponding index specified by axis. | ||
|
||
We *highly* recommend using keyword arguments to clarify your | ||
intent. | ||
|
||
Examples | ||
-------- | ||
>>> df = pd.DataFrame({"num_legs": [4, 4, 2], | ||
... "num_arms": [0, 0, 2]}, | ||
... index=["dog", "cat", "monkey"], | ||
... columns=["num_legs", "num_arms"]) | ||
>>> df | ||
num_legs num_arms | ||
dog 4 0 | ||
cat 4 0 | ||
monkey 2 2 | ||
itholic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
>>> df = df.rename_axis("animal").sort_index() | ||
>>> df # doctest: +NORMALIZE_WHITESPACE | ||
num_legs num_arms | ||
animal | ||
cat 4 0 | ||
dog 4 0 | ||
monkey 2 2 | ||
itholic marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
>>> df = df.rename_axis("limbs", axis="columns").sort_index() | ||
>>> df # doctest: +NORMALIZE_WHITESPACE | ||
limbs num_legs num_arms | ||
animal | ||
cat 4 0 | ||
dog 4 0 | ||
monkey 2 2 | ||
|
||
**MultiIndex** | ||
|
||
>>> index = pd.MultiIndex.from_product([['mammal'], | ||
... ['dog', 'cat', 'monkey']], | ||
... names=['type', 'name']) | ||
>>> df = ks.DataFrame({"num_legs": [4, 4, 2], | ||
... "num_arms": [0, 0, 2]}, | ||
... index=index, | ||
... columns=["num_legs", "num_arms"]) | ||
>>> df # doctest: +NORMALIZE_WHITESPACE | ||
num_legs num_arms | ||
type name | ||
mammal dog 4 0 | ||
cat 4 0 | ||
monkey 2 2 | ||
|
||
>>> df.rename_axis(index={'type': 'class'}).sort_index() # doctest: +NORMALIZE_WHITESPACE | ||
num_legs num_arms | ||
class name | ||
mammal cat 4 0 | ||
dog 4 0 | ||
monkey 2 2 | ||
|
||
>>> df.rename_axis(index=str.upper).sort_index() # doctest: +NORMALIZE_WHITESPACE | ||
num_legs num_arms | ||
TYPE NAME | ||
mammal cat 4 0 | ||
dog 4 0 | ||
monkey 2 2 | ||
""" | ||
|
||
def gen_names(v, curnames): | ||
if is_scalar(v): | ||
newnames = [v] | ||
elif is_list_like(v) and not is_dict_like(v): | ||
newnames = list(v) | ||
elif is_dict_like(v): | ||
newnames = [v[name] if name in v else name for name in curnames] | ||
elif callable(v): | ||
newnames = [v(name) for name in curnames] | ||
else: | ||
raise ValueError( | ||
"`mapper` or `index` or `columns` should be " | ||
"either dict-like or function type." | ||
) | ||
|
||
if len(newnames) != len(curnames): | ||
raise ValueError( | ||
"Length of new names must be {}, got {}".format(len(curnames), len(newnames)) | ||
) | ||
|
||
return [name if is_name_like_tuple(name) else (name,) for name in newnames] | ||
|
||
if mapper is not None and (index is not None or columns is not None): | ||
raise TypeError("Cannot specify both 'mapper' and any of 'index' or 'columns'.") | ||
|
||
if mapper is not None: | ||
axis = validate_axis(axis) | ||
if axis == 0: | ||
index = mapper | ||
elif axis == 1: | ||
columns = mapper | ||
|
||
column_label_names = ( | ||
gen_names(columns, self.columns.names) | ||
if columns is not None | ||
else self._internal.column_label_names | ||
) | ||
index_names = ( | ||
gen_names(index, self.index.names) if index is not None else self._internal.index_names | ||
) | ||
index_map = OrderedDict(zip(self._internal.index_spark_column_names, index_names)) | ||
|
||
internal = self._internal.copy(index_map=index_map, column_label_names=column_label_names) | ||
if inplace: | ||
self._update_internal_frame(internal) | ||
return None | ||
else: | ||
return DataFrame(internal) | ||
|
||
def keys(self): | ||
""" | ||
Return alias for columns. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1115,6 +1115,77 @@ def rename(self, index=None, **kwargs): | |
else: | ||
return first_series(kdf) | ||
|
||
def rename_axis( | ||
self, mapper: Optional[Any] = None, index: Optional[Any] = None, inplace: bool = False | ||
) -> Optional["Series"]: | ||
""" | ||
Set the name of the axis for the index or columns. | ||
|
||
Parameters | ||
---------- | ||
mapper, index : scalar, list-like, dict-like or function, optional | ||
A scalar, list-like, dict-like or functions transformations to | ||
apply to the index values. | ||
Comment on lines
+1126
to
+1128
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe this it's also not correct? I think you can refer to here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is for the same reason I explained above. |
||
inplace : bool, default False | ||
Modifies the object directly, instead of creating a new Series. | ||
|
||
Returns | ||
------- | ||
Series, or None if `inplace` is True. | ||
|
||
See Also | ||
-------- | ||
Series.rename : Alter Series index labels or name. | ||
DataFrame.rename : Alter DataFrame index labels or name. | ||
Index.rename : Set new names on index. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we can also have |
||
|
||
Examples | ||
-------- | ||
>>> s = ks.Series(["dog", "cat", "monkey"], name="animal") | ||
>>> s # doctest: +NORMALIZE_WHITESPACE | ||
0 dog | ||
1 cat | ||
2 monkey | ||
Name: animal, dtype: object | ||
>>> s.rename_axis("index").sort_index() # doctest: +NORMALIZE_WHITESPACE | ||
index | ||
0 dog | ||
1 cat | ||
2 monkey | ||
Name: animal, dtype: object | ||
|
||
**MultiIndex** | ||
|
||
>>> index = pd.MultiIndex.from_product([['mammal'], | ||
... ['dog', 'cat', 'monkey']], | ||
... names=['type', 'name']) | ||
>>> s = ks.Series([4, 4, 2], index=index, name='num_legs') | ||
>>> s # doctest: +NORMALIZE_WHITESPACE | ||
type name | ||
mammal dog 4 | ||
cat 4 | ||
monkey 2 | ||
Name: num_legs, dtype: int64 | ||
>>> s.rename_axis(index={'type': 'class'}).sort_index() # doctest: +NORMALIZE_WHITESPACE | ||
class name | ||
mammal cat 4 | ||
dog 4 | ||
monkey 2 | ||
Name: num_legs, dtype: int64 | ||
>>> s.rename_axis(index=str.upper).sort_index() # doctest: +NORMALIZE_WHITESPACE | ||
TYPE NAME | ||
mammal cat 4 | ||
dog 4 | ||
monkey 2 | ||
Name: num_legs, dtype: int64 | ||
""" | ||
kdf = self.to_frame().rename_axis(mapper=mapper, index=index, inplace=False) | ||
if inplace: | ||
self._update_anchor(kdf) | ||
return None | ||
else: | ||
return first_series(kdf) | ||
|
||
@property | ||
def index(self): | ||
"""The index (axis labels) Column of the Series. | ||
|
@@ -2149,7 +2220,7 @@ def unique(self): | |
|
||
def sort_values( | ||
self, ascending: bool = True, inplace: bool = False, na_position: str = "last" | ||
) -> Union["Series", None]: | ||
) -> Optional["Series"]: | ||
""" | ||
Sort by the values. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe this explanation for
mapper
is not correct?In the pandas latest docs, they say:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I should have precised that pandas does not support using a dict or a function as
mapper
. They say :However, if mapper is dict-like or a function, it will use the deprecated behavior of modifying the axis labels.
So, I am not sure this is the correct behavior, that is why I added the possibility to use dict / function as
mapper
, and therefore updated the docs.I am interested in your opinion on this !