From d1826bba808ab043db14a4a7697611887e8cdac2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 17 Oct 2019 13:45:30 -0500 Subject: [PATCH 1/5] REF: Store metadata in attrs dict This aids in the implementation of https://github.com/pandas-dev/pandas/pull/28394. Over there, I'm having issues with using `NDFrame.__finalize__` to copy attributes, in part because getattribute on NDFrame is so complicated. This simplifies this because we only need to look in NDFrame.attrs, which is just a plain dictionary. Aside from the addition of a public NDFrame.attrs dictionary, there aren't any user-facing API changes. --- doc/source/whatsnew/v1.0.0.rst | 1 - pandas/core/base.py | 13 ++++++++++++- pandas/core/generic.py | 33 ++++++++++++++++++++++++++++++++- pandas/core/indexes/base.py | 3 ++- pandas/core/series.py | 17 +---------------- 5 files changed, 47 insertions(+), 20 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 48c1173a372a7..430fde1cfaeea 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -204,7 +204,6 @@ Deprecations - ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). -- .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/base.py b/pandas/core/base.py index 5ae3926952a67..504211cebbb34 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins from collections import OrderedDict import textwrap -from typing import Dict, FrozenSet, Optional +from typing import Dict, FrozenSet, Hashable, Optional import warnings import numpy as np @@ -30,6 +30,7 @@ is_timedelta64_ns_dtype, ) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries +from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna from pandas.core import algorithms, common as com @@ -663,6 +664,16 @@ class IndexOpsMixin: ] ) # type: FrozenSet[str] + @property + def name(self) -> Optional[Hashable]: + return self.attrs.get("name", None) + + @name.setter + def name(self, value: Hashable) -> None: + if not is_hashable(value): + raise TypeError("Series.name must be a hashable type") + self.attrs["name"] = value + def transpose(self, *args, **kwargs): """ Return the transpose, which is by definition self. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e97772a418982..ac607498d88b5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -14,6 +14,7 @@ FrozenSet, Hashable, List, + Mapping, Optional, Sequence, Set, @@ -197,6 +198,7 @@ def __init__( axes: Optional[List[Index]] = None, copy: bool = False, dtype: Optional[Dtype] = None, + attrs: Mapping[Hashable, Any] = None, fastpath: bool = False, ): @@ -213,6 +215,11 @@ def __init__( object.__setattr__(self, "_is_copy", None) object.__setattr__(self, "_data", data) object.__setattr__(self, "_item_cache", {}) + if attrs is None: + attrs = {} + else: + attrs = dict(attrs) + object.__setattr__(self, "_attrs", attrs) def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): """ passed a manager and a axes dict """ @@ -233,6 +240,19 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): # ---------------------------------------------------------------------- + @property + def attrs(self): + """ + Dictionary of global attributes on this object. + """ + if self._attrs is None: + self._attrs = {} + return self._attrs + + @attrs.setter + def attrs(self, value: Mapping[Hashable, Any]) -> None: + self._attrs = dict(value) + @property def is_copy(self): """ @@ -2029,7 +2049,13 @@ def to_dense(self): def __getstate__(self): meta = {k: getattr(self, k, None) for k in self._metadata} - return dict(_data=self._data, _typ=self._typ, _metadata=self._metadata, **meta) + return dict( + _data=self._data, + _typ=self._typ, + _metadata=self._metadata, + attrs=self.attrs, + **meta + ) def __setstate__(self, state): @@ -2038,6 +2064,8 @@ def __setstate__(self, state): elif isinstance(state, dict): typ = state.get("_typ") if typ is not None: + attrs = state.get("_attrs", {}) + object.__setattr__(self, "_attrs", attrs) # set in the order of internal names # to avoid definitional recursion @@ -5213,6 +5241,9 @@ def __finalize__(self, other, method=None, **kwargs): """ if isinstance(other, NDFrame): + for name in other.attrs: + self.attrs[name] = other.attrs[name] + # For subclasses using _metadata. for name in self._metadata: object.__setattr__(self, name, getattr(other, name, None)) return self diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 526b2c2e2c412..dc343e897cf95 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import FrozenSet, Union +from typing import Any, FrozenSet, Hashable, Mapping, Union import warnings import numpy as np @@ -266,6 +266,7 @@ def __new__( name=None, fastpath=None, tupleize_cols=True, + attrs: Mapping[Hashable, Any] = None, **kwargs ) -> "Index": diff --git a/pandas/core/series.py b/pandas/core/series.py index 1039e9af929d4..c7128ca67b7ee 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -29,7 +29,6 @@ is_dict_like, is_extension_array_dtype, is_extension_type, - is_hashable, is_integer, is_iterator, is_list_like, @@ -173,7 +172,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): Copy input data. """ - _metadata = ["name"] + _metadata = [] _accessors = {"dt", "cat", "str", "sparse"} _deprecations = ( base.IndexOpsMixin._deprecations @@ -324,7 +323,6 @@ def __init__( data = SingleBlockManager(data, index, fastpath=True) generic.NDFrame.__init__(self, data, fastpath=True) - self.name = name self._set_axis(0, index, fastpath=True) @@ -457,19 +455,6 @@ def _update_inplace(self, result, **kwargs): # we want to call the generic version and not the IndexOpsMixin return generic.NDFrame._update_inplace(self, result, **kwargs) - @property - def name(self): - """ - Return name of the Series. - """ - return self._name - - @name.setter - def name(self, value): - if value is not None and not is_hashable(value): - raise TypeError("Series.name must be a hashable type") - object.__setattr__(self, "_name", value) - # ndarray compatibility @property def dtype(self): From e6183cd5647e26670fd7012134d3c093c4d59658 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 18 Oct 2019 07:05:33 -0500 Subject: [PATCH 2/5] fixup Index.name --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/base.py | 13 +------------ pandas/core/generic.py | 8 ++++++-- pandas/core/indexes/base.py | 3 +-- pandas/core/series.py | 15 +++++++++++++-- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 430fde1cfaeea..48c1173a372a7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -204,6 +204,7 @@ Deprecations - ``Index.set_value`` has been deprecated. For a given index ``idx``, array ``arr``, value in ``idx`` of ``idx_val`` and a new value of ``val``, ``idx.set_value(arr, idx_val, val)`` is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). +- .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/core/base.py b/pandas/core/base.py index 504211cebbb34..5ae3926952a67 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -4,7 +4,7 @@ import builtins from collections import OrderedDict import textwrap -from typing import Dict, FrozenSet, Hashable, Optional +from typing import Dict, FrozenSet, Optional import warnings import numpy as np @@ -30,7 +30,6 @@ is_timedelta64_ns_dtype, ) from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries -from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import isna from pandas.core import algorithms, common as com @@ -664,16 +663,6 @@ class IndexOpsMixin: ] ) # type: FrozenSet[str] - @property - def name(self) -> Optional[Hashable]: - return self.attrs.get("name", None) - - @name.setter - def name(self, value: Hashable) -> None: - if not is_hashable(value): - raise TypeError("Series.name must be a hashable type") - self.attrs["name"] = value - def transpose(self, *args, **kwargs): """ Return the transpose, which is by definition self. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ac607498d88b5..01fc135294e8a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8,6 +8,7 @@ import re from textwrap import dedent from typing import ( + TYPE_CHECKING, Any, Callable, Dict, @@ -189,6 +190,9 @@ class NDFrame(PandasObject, SelectionMixin): _is_copy = None _data = None # type: BlockManager + if TYPE_CHECKING: + _attrs = {} # type: Dict[Hashable, Any] + # ---------------------------------------------------------------------- # Constructors @@ -198,7 +202,7 @@ def __init__( axes: Optional[List[Index]] = None, copy: bool = False, dtype: Optional[Dtype] = None, - attrs: Mapping[Hashable, Any] = None, + attrs: Optional[Mapping[Hashable, Any]] = None, fastpath: bool = False, ): @@ -241,7 +245,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): # ---------------------------------------------------------------------- @property - def attrs(self): + def attrs(self) -> Dict[Hashable, Any]: """ Dictionary of global attributes on this object. """ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dc343e897cf95..526b2c2e2c412 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1,7 +1,7 @@ from datetime import datetime import operator from textwrap import dedent -from typing import Any, FrozenSet, Hashable, Mapping, Union +from typing import FrozenSet, Union import warnings import numpy as np @@ -266,7 +266,6 @@ def __new__( name=None, fastpath=None, tupleize_cols=True, - attrs: Mapping[Hashable, Any] = None, **kwargs ) -> "Index": diff --git a/pandas/core/series.py b/pandas/core/series.py index c7128ca67b7ee..89667251a124a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,7 +5,7 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent -from typing import Any, Callable +from typing import Any, Callable, Hashable, List, Optional import warnings import numpy as np @@ -44,6 +44,7 @@ ABCSeries, ABCSparseArray, ) +from pandas.core.dtypes.inference import is_hashable from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, @@ -172,7 +173,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): Copy input data. """ - _metadata = [] + _metadata = [] # type: List[str] _accessors = {"dt", "cat", "str", "sparse"} _deprecations = ( base.IndexOpsMixin._deprecations @@ -470,6 +471,16 @@ def dtypes(self): """ return self._data.dtype + @property + def name(self) -> Optional[Hashable]: + return self.attrs.get("name", None) + + @name.setter + def name(self, value: Hashable) -> None: + if not is_hashable(value): + raise TypeError("Series.name must be a hashable type") + self.attrs["name"] = value + @property def ftype(self): """ From 67a32635359c591a16e43b0f763177b0da4a042c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 18 Oct 2019 08:05:04 -0500 Subject: [PATCH 3/5] fixup name --- pandas/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 89667251a124a..e3604a2924cec 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,7 +5,7 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent -from typing import Any, Callable, Hashable, List, Optional +from typing import Any, Callable, Hashable, List import warnings import numpy as np @@ -472,7 +472,7 @@ def dtypes(self): return self._data.dtype @property - def name(self) -> Optional[Hashable]: + def name(self) -> Hashable: return self.attrs.get("name", None) @name.setter From d766c85b8f0cd07e3509af9d7da457ec3175cfaf Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 21 Oct 2019 09:23:56 -0500 Subject: [PATCH 4/5] added note on type checking --- pandas/core/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 01fc135294e8a..55e2cceac91c3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -191,6 +191,9 @@ class NDFrame(PandasObject, SelectionMixin): _data = None # type: BlockManager if TYPE_CHECKING: + # TODO(PY36): replace with _attrs : Dict[Hashable, Any] + # We need the TYPE_CHECKING, because _attrs is not a class attribute + # and Py35 doesn't support the new syntax. _attrs = {} # type: Dict[Hashable, Any] # ---------------------------------------------------------------------- From 60fd202964b10ea1f0755503c02b88d5ec302de6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 21 Oct 2019 09:27:51 -0500 Subject: [PATCH 5/5] Added reference docs --- doc/source/reference/frame.rst | 13 +++++++++++++ doc/source/reference/series.rst | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 4982edeb7f85b..4b5faed0f4d2d 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -274,6 +274,19 @@ Time series-related DataFrame.tz_convert DataFrame.tz_localize +.. _api.frame.metadata: + +Metadata +~~~~~~~~ + +:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame. + +.. autosummary:: + :toctree: api/ + + DataFrame.attrs + + .. _api.dataframe.plotting: Plotting diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 5d825c8092efc..59910ba357130 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -531,6 +531,19 @@ Sparse-dtype specific methods and attributes are provided under the Series.sparse.to_coo +.. _api.series.metadata: + +Metadata +~~~~~~~~ + +:attr:`Series.attrs` is a dictionary for storing global metadata for this Series. + +.. autosummary:: + :toctree: api/ + + Series.attrs + + Plotting -------- ``Series.plot`` is both a callable method and a namespace attribute for