diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7e43404b86bd..0c11e0d469155 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pylint, pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.6 + rev: v0.1.13 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -31,8 +31,7 @@ repos: exclude: ^pandas/tests args: [--select, "ANN001,ANN2", --fix-only, --exit-non-zero-on-fix] - id: ruff-format - # TODO: "." not needed in ruff 0.1.8 - args: ["."] + exclude: ^scripts - repo: https://github.com/jendrikseipp/vulture rev: 'v2.10' hooks: diff --git a/doc/make.py b/doc/make.py index 2583242786fc8..19df4bae2ea55 100755 --- a/doc/make.py +++ b/doc/make.py @@ -113,7 +113,7 @@ def _run_os(*args) -> None: Examples -------- - >>> DocBuilder()._run_os('python', '--version') + >>> DocBuilder()._run_os("python", "--version") """ subprocess.check_call(args, stdout=sys.stdout, stderr=sys.stderr) @@ -129,7 +129,7 @@ def _sphinx_build(self, kind: str): Examples -------- - >>> DocBuilder(num_jobs=4)._sphinx_build('html') + >>> DocBuilder(num_jobs=4)._sphinx_build("html") """ if kind not in ("html", "latex", "linkcheck"): raise ValueError(f"kind must be html, latex or linkcheck, not {kind}") diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 7612739531695..8ad1da732a449 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -476,7 +476,7 @@ class option_context(ContextDecorator): Examples -------- >>> from pandas import option_context - >>> with option_context('display.max_rows', 10, 'display.max_columns', 5): + >>> with option_context("display.max_rows", 10, "display.max_columns", 5): ... pass """ diff --git a/pandas/_testing/_warnings.py b/pandas/_testing/_warnings.py index cff28f6a20472..d9516077788c8 100644 --- a/pandas/_testing/_warnings.py +++ b/pandas/_testing/_warnings.py @@ -76,10 +76,8 @@ class for all warnings. To raise multiple types of exceptions, >>> import warnings >>> with assert_produces_warning(): ... warnings.warn(UserWarning()) - ... >>> with assert_produces_warning(False): ... warnings.warn(RuntimeWarning()) - ... Traceback (most recent call last): ... AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index 5ad5d02360f0b..4aea85d50c352 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1178,8 +1178,8 @@ def assert_frame_equal( but with columns of differing dtypes. >>> from pandas.testing import assert_frame_equal - >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) - >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) + >>> df1 = pd.DataFrame({"a": [1, 2], "b": [3, 4]}) + >>> df2 = pd.DataFrame({"a": [1, 2], "b": [3.0, 4.0]}) df1 equals itself. diff --git a/pandas/_testing/contexts.py b/pandas/_testing/contexts.py index eb6e4a917889a..3570ebaeffed5 100644 --- a/pandas/_testing/contexts.py +++ b/pandas/_testing/contexts.py @@ -70,9 +70,8 @@ def set_timezone(tz: str) -> Generator[None, None, None]: >>> tzlocal().tzname(datetime(2021, 1, 1)) # doctest: +SKIP 'IST' - >>> with set_timezone('US/Eastern'): + >>> with set_timezone("US/Eastern"): ... tzlocal().tzname(datetime(2021, 1, 1)) - ... 'EST' """ import time diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 683af644cbdb3..39a5ffd947009 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -265,7 +265,7 @@ def __init__(self, pandas_object): # noqa: E999 For consistency with pandas methods, you should raise an ``AttributeError`` if the data passed to your accessor has an incorrect dtype. - >>> pd.Series(['a', 'b']).dt + >>> pd.Series(["a", "b"]).dt Traceback (most recent call last): ... AttributeError: Can only use .dt accessor with datetimelike values @@ -274,8 +274,6 @@ def __init__(self, pandas_object): # noqa: E999 -------- In your library code:: - import pandas as pd - @pd.api.extensions.register_dataframe_accessor("geo") class GeoAccessor: def __init__(self, pandas_obj): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 128477dac562e..b346cb9b2c175 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1215,8 +1215,9 @@ def take( >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True) array([10., 10., nan]) - >>> pd.api.extensions.take(np.array([10, 20, 30]), [0, 0, -1], allow_fill=True, - ... fill_value=-10) + >>> pd.api.extensions.take( + ... np.array([10, 20, 30]), [0, 0, -1], allow_fill=True, fill_value=-10 + ... ) array([ 10, 10, -10]) """ if not isinstance(arr, (np.ndarray, ABCExtensionArray, ABCIndex, ABCSeries)): diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7ae65ba11a752..c15d7b7928867 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1794,14 +1794,14 @@ def normalize_keyword_aggregation( def _make_unique_kwarg_list( - seq: Sequence[tuple[Any, Any]] + seq: Sequence[tuple[Any, Any]], ) -> Sequence[tuple[Any, Any]]: """ Uniquify aggfunc name of the pairs in the order list Examples: -------- - >>> kwarg_list = [('a', ''), ('a', ''), ('b', '')] + >>> kwarg_list = [("a", ""), ("a", ""), ("b", "")] >>> _make_unique_kwarg_list(kwarg_list) [('a', '_0'), ('a', '_1'), ('b', '')] """ @@ -1833,7 +1833,7 @@ def relabel_result( >>> from pandas.core.apply import relabel_result >>> result = pd.DataFrame( ... {"A": [np.nan, 2, np.nan], "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]}, - ... index=["max", "mean", "min"] + ... index=["max", "mean", "min"], ... ) >>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]} >>> columns = ("foo", "aab", "bar", "dat") @@ -1972,7 +1972,7 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any: Examples -------- - >>> maybe_mangle_lambdas('sum') + >>> maybe_mangle_lambdas("sum") 'sum' >>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP [, @@ -2017,7 +2017,7 @@ def validate_func_kwargs( Examples -------- - >>> validate_func_kwargs({'one': 'min', 'two': 'max'}) + >>> validate_func_kwargs({"one": "min", "two": "max"}) (['one', 'two'], ['min', 'max']) """ tuple_given_message = "func is expected but received {} in **kwargs." diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 62f6737d86d51..dde1b8a35e2f0 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -119,8 +119,9 @@ def __add__(self, other): Examples -------- - >>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]}, - ... index=['elk', 'moose']) + >>> df = pd.DataFrame( + ... {"height": [1.5, 2.6], "weight": [500, 800]}, index=["elk", "moose"] + ... ) >>> df height weight elk 1.5 500 @@ -128,14 +129,14 @@ def __add__(self, other): Adding a scalar affects all rows and columns. - >>> df[['height', 'weight']] + 1.5 + >>> df[["height", "weight"]] + 1.5 height weight elk 3.0 501.5 moose 4.1 801.5 Each element of a list is added to a column of the DataFrame, in order. - >>> df[['height', 'weight']] + [0.5, 1.5] + >>> df[["height", "weight"]] + [0.5, 1.5] height weight elk 2.0 501.5 moose 3.1 801.5 @@ -143,7 +144,7 @@ def __add__(self, other): Keys of a dictionary are aligned to the DataFrame, based on column names; each value in the dictionary is added to the corresponding column. - >>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5} + >>> df[["height", "weight"]] + {"height": 0.5, "weight": 1.5} height weight elk 2.0 501.5 moose 3.1 801.5 @@ -151,8 +152,8 @@ def __add__(self, other): When `other` is a :class:`Series`, the index of `other` is aligned with the columns of the DataFrame. - >>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height']) - >>> df[['height', 'weight']] + s1 + >>> s1 = pd.Series([0.5, 1.5], index=["weight", "height"]) + >>> df[["height", "weight"]] + s1 height weight elk 3.0 500.5 moose 4.1 800.5 @@ -161,13 +162,13 @@ def __add__(self, other): the :class:`Series` will not be reoriented. If index-wise alignment is desired, :meth:`DataFrame.add` should be used with `axis='index'`. - >>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose']) - >>> df[['height', 'weight']] + s2 + >>> s2 = pd.Series([0.5, 1.5], index=["elk", "moose"]) + >>> df[["height", "weight"]] + s2 elk height moose weight elk NaN NaN NaN NaN moose NaN NaN NaN NaN - >>> df[['height', 'weight']].add(s2, axis='index') + >>> df[["height", "weight"]].add(s2, axis="index") height weight elk 2.0 500.5 moose 4.1 801.5 @@ -175,9 +176,10 @@ def __add__(self, other): When `other` is a :class:`DataFrame`, both columns names and the index are aligned. - >>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]}, - ... index=['elk', 'moose', 'deer']) - >>> df[['height', 'weight']] + other + >>> other = pd.DataFrame( + ... {"height": [0.2, 0.4, 0.6]}, index=["elk", "moose", "deer"] + ... ) + >>> df[["height", "weight"]] + other height weight deer NaN NaN elk 1.7 NaN diff --git a/pandas/core/arrays/arrow/accessors.py b/pandas/core/arrays/arrow/accessors.py index 7c5ccb2db0194..19ec253e81ef2 100644 --- a/pandas/core/arrays/arrow/accessors.py +++ b/pandas/core/arrays/arrow/accessors.py @@ -100,9 +100,7 @@ def len(self) -> Series: ... [1, 2, 3], ... [3], ... ], - ... dtype=pd.ArrowDtype(pa.list_( - ... pa.int64() - ... )) + ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), ... ) >>> s.list.len() 0 3 @@ -136,9 +134,7 @@ def __getitem__(self, key: int | slice) -> Series: ... [1, 2, 3], ... [3], ... ], - ... dtype=pd.ArrowDtype(pa.list_( - ... pa.int64() - ... )) + ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), ... ) >>> s.list[0] 0 1 @@ -195,9 +191,7 @@ def flatten(self) -> Series: ... [1, 2, 3], ... [3], ... ], - ... dtype=pd.ArrowDtype(pa.list_( - ... pa.int64() - ... )) + ... dtype=pd.ArrowDtype(pa.list_(pa.int64())), ... ) >>> s.list.flatten() 0 1 @@ -253,9 +247,9 @@ def dtypes(self) -> Series: ... {"version": 2, "project": "pandas"}, ... {"version": 1, "project": "numpy"}, ... ], - ... dtype=pd.ArrowDtype(pa.struct( - ... [("version", pa.int64()), ("project", pa.string())] - ... )) + ... dtype=pd.ArrowDtype( + ... pa.struct([("version", pa.int64()), ("project", pa.string())]) + ... ), ... ) >>> s.struct.dtypes version int64[pyarrow] @@ -324,9 +318,9 @@ def field( ... {"version": 2, "project": "pandas"}, ... {"version": 1, "project": "numpy"}, ... ], - ... dtype=pd.ArrowDtype(pa.struct( - ... [("version", pa.int64()), ("project", pa.string())] - ... )) + ... dtype=pd.ArrowDtype( + ... pa.struct([("version", pa.int64()), ("project", pa.string())]) + ... ), ... ) Extract by field name. @@ -357,19 +351,21 @@ def field( For nested struct types, you can pass a list of values to index multiple levels: - >>> version_type = pa.struct([ - ... ("major", pa.int64()), - ... ("minor", pa.int64()), - ... ]) + >>> version_type = pa.struct( + ... [ + ... ("major", pa.int64()), + ... ("minor", pa.int64()), + ... ] + ... ) >>> s = pd.Series( ... [ ... {"version": {"major": 1, "minor": 5}, "project": "pandas"}, ... {"version": {"major": 2, "minor": 1}, "project": "pandas"}, ... {"version": {"major": 1, "minor": 26}, "project": "numpy"}, ... ], - ... dtype=pd.ArrowDtype(pa.struct( - ... [("version", version_type), ("project", pa.string())] - ... )) + ... dtype=pd.ArrowDtype( + ... pa.struct([("version", version_type), ("project", pa.string())]) + ... ), ... ) >>> s.struct.field(["version", "minor"]) 0 5 @@ -454,9 +450,9 @@ def explode(self) -> DataFrame: ... {"version": 2, "project": "pandas"}, ... {"version": 1, "project": "numpy"}, ... ], - ... dtype=pd.ArrowDtype(pa.struct( - ... [("version", pa.int64()), ("project", pa.string())] - ... )) + ... dtype=pd.ArrowDtype( + ... pa.struct([("version", pa.int64()), ("project", pa.string())]) + ... ), ... ) >>> s.struct.explode() diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e41a96cfcef7e..147b94e441f30 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -380,8 +380,9 @@ def _from_factorized(cls, values, original): Examples -------- - >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), - ... pd.Interval(1, 5), pd.Interval(1, 5)]) + >>> interv_arr = pd.arrays.IntervalArray( + ... [pd.Interval(0, 1), pd.Interval(1, 5), pd.Interval(1, 5)] + ... ) >>> codes, uniques = pd.factorize(interv_arr) >>> pd.arrays.IntervalArray._from_factorized(uniques, interv_arr) @@ -685,7 +686,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: Casting to another ``ExtensionDtype`` returns an ``ExtensionArray``: - >>> arr1 = arr.astype('Float64') + >>> arr1 = arr.astype("Float64") >>> arr1 [1.0, 2.0, 3.0] @@ -695,7 +696,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: Otherwise, we will get a Numpy ndarray: - >>> arr2 = arr.astype('float64') + >>> arr2 = arr.astype("float64") >>> arr2 array([1., 2., 3.]) >>> arr2.dtype @@ -939,15 +940,16 @@ def interpolate( Examples -------- >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3])) - >>> arr.interpolate(method="linear", - ... limit=3, - ... limit_direction="forward", - ... index=pd.Index([1, 2, 3, 4]), - ... fill_value=1, - ... copy=False, - ... axis=0, - ... limit_area="inside" - ... ) + >>> arr.interpolate( + ... method="linear", + ... limit=3, + ... limit_direction="forward", + ... index=pd.Index([1, 2, 3, 4]), + ... fill_value=1, + ... copy=False, + ... axis=0, + ... limit_area="inside", + ... ) [0.0, 1.0, 2.0, 3.0] Length: 4, dtype: float64 @@ -1467,8 +1469,10 @@ def factorize( Examples -------- - >>> idx1 = pd.PeriodIndex(["2014-01", "2014-01", "2014-02", "2014-02", - ... "2014-03", "2014-03"], freq="M") + >>> idx1 = pd.PeriodIndex( + ... ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], + ... freq="M", + ... ) >>> arr, idx = idx1.factorize() >>> arr array([0, 0, 1, 1, 2, 2]) @@ -1627,10 +1631,9 @@ def take(self, indices, allow_fill=False, fill_value=None): # type for the array, to the physical storage type for # the data, before passing to take. - result = take(data, indices, fill_value=fill_value, - allow_fill=allow_fill) + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) return self._from_sequence(result, dtype=self.dtype) - """ + """ # noqa: E501 # Implementer note: The `fill_value` parameter should be a user-facing # value, an instance of self.dtype.type. When passed `fill_value=None`, # the default of `self.dtype.na_value` should be used. @@ -1767,7 +1770,7 @@ def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: -------- >>> class MyExtensionArray(pd.arrays.NumpyExtensionArray): ... def _formatter(self, boxed=False): - ... return lambda x: '*' + str(x) + '*' if boxed else repr(x) + '*' + ... return lambda x: "*" + str(x) + "*" if boxed else repr(x) + "*" >>> MyExtensionArray(np.array([1, 2, 3, 4])) [1*, 2*, 3*, 4*] @@ -1902,7 +1905,7 @@ def _accumulate( Examples -------- >>> arr = pd.array([1, 2, 3]) - >>> arr._accumulate(name='cumsum') + >>> arr._accumulate(name="cumsum") [1, 3, 6] Length: 3, dtype: Int64 @@ -2007,10 +2010,9 @@ def _hash_pandas_object( Examples -------- - >>> pd.array([1, 2])._hash_pandas_object(encoding='utf-8', - ... hash_key="1000000000000000", - ... categorize=False - ... ) + >>> pd.array([1, 2])._hash_pandas_object( + ... encoding="utf-8", hash_key="1000000000000000", categorize=False + ... ) array([ 6238072747940578789, 15839785061582574730], dtype=uint64) """ from pandas.core.util.hashing import hash_array @@ -2044,8 +2046,9 @@ def _explode(self) -> tuple[Self, npt.NDArray[np.uint64]]: Examples -------- >>> import pyarrow as pa - >>> a = pd.array([[1, 2, 3], [4], [5, 6]], - ... dtype=pd.ArrowDtype(pa.list_(pa.int64()))) + >>> a = pd.array( + ... [[1, 2, 3], [4], [5, 6]], dtype=pd.ArrowDtype(pa.list_(pa.int64())) + ... ) >>> a._explode() ( [1, 2, 3, 4, 5, 6] diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index dea90dbd2f0d1..d1dba024e85c5 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -329,7 +329,7 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi [1, 2, 3, 1, 2, 3] Categories (3, int64): [1, 2, 3] - >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c']) + >>> pd.Categorical(["a", "b", "c", "a", "b", "c"]) ['a', 'b', 'c', 'a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] @@ -349,8 +349,9 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject, ObjectStringArrayMi Ordered `Categoricals` can be sorted according to the custom order of the categories and can have a min and max value. - >>> c = pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'], ordered=True, - ... categories=['c', 'b', 'a']) + >>> c = pd.Categorical( + ... ["a", "b", "c", "a", "b", "c"], ordered=True, categories=["c", "b", "a"] + ... ) >>> c ['a', 'b', 'c', 'a', 'b', 'c'] Categories (3, object): ['c' < 'b' < 'a'] @@ -509,7 +510,7 @@ def dtype(self) -> CategoricalDtype: Examples -------- - >>> cat = pd.Categorical(['a', 'b'], ordered=True) + >>> cat = pd.Categorical(["a", "b"], ordered=True) >>> cat ['a', 'b'] Categories (2, object): ['a' < 'b'] @@ -749,7 +750,7 @@ def from_codes( Examples -------- - >>> dtype = pd.CategoricalDtype(['a', 'b'], ordered=True) + >>> dtype = pd.CategoricalDtype(["a", "b"], ordered=True) >>> pd.Categorical.from_codes(codes=[0, 1, 0, 1], dtype=dtype) ['a', 'b', 'a', 'b'] Categories (2, object): ['a' < 'b'] @@ -804,28 +805,28 @@ def categories(self) -> Index: -------- For :class:`pandas.Series`: - >>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category') + >>> ser = pd.Series(["a", "b", "c", "a"], dtype="category") >>> ser.cat.categories Index(['a', 'b', 'c'], dtype='object') - >>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], categories=['b', 'c', 'd']) + >>> raw_cat = pd.Categorical(["a", "b", "c", "a"], categories=["b", "c", "d"]) >>> ser = pd.Series(raw_cat) >>> ser.cat.categories Index(['b', 'c', 'd'], dtype='object') For :class:`pandas.Categorical`: - >>> cat = pd.Categorical(['a', 'b'], ordered=True) + >>> cat = pd.Categorical(["a", "b"], ordered=True) >>> cat.categories Index(['a', 'b'], dtype='object') For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'c', 'b', 'a', 'c', 'b']) + >>> ci = pd.CategoricalIndex(["a", "c", "b", "a", "c", "b"]) >>> ci.categories Index(['a', 'b', 'c'], dtype='object') - >>> ci = pd.CategoricalIndex(['a', 'c'], categories=['c', 'b', 'a']) + >>> ci = pd.CategoricalIndex(["a", "c"], categories=["c", "b", "a"]) >>> ci.categories Index(['c', 'b', 'a'], dtype='object') """ @@ -840,32 +841,32 @@ def ordered(self) -> Ordered: -------- For :class:`pandas.Series`: - >>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category') + >>> ser = pd.Series(["a", "b", "c", "a"], dtype="category") >>> ser.cat.ordered False - >>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], ordered=True) + >>> raw_cat = pd.Categorical(["a", "b", "c", "a"], ordered=True) >>> ser = pd.Series(raw_cat) >>> ser.cat.ordered True For :class:`pandas.Categorical`: - >>> cat = pd.Categorical(['a', 'b'], ordered=True) + >>> cat = pd.Categorical(["a", "b"], ordered=True) >>> cat.ordered True - >>> cat = pd.Categorical(['a', 'b'], ordered=False) + >>> cat = pd.Categorical(["a", "b"], ordered=False) >>> cat.ordered False For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'b'], ordered=True) + >>> ci = pd.CategoricalIndex(["a", "b"], ordered=True) >>> ci.ordered True - >>> ci = pd.CategoricalIndex(['a', 'b'], ordered=False) + >>> ci = pd.CategoricalIndex(["a", "b"], ordered=False) >>> ci.ordered False """ @@ -891,17 +892,17 @@ def codes(self) -> np.ndarray: -------- For :class:`pandas.Categorical`: - >>> cat = pd.Categorical(['a', 'b'], ordered=True) + >>> cat = pd.Categorical(["a", "b"], ordered=True) >>> cat.codes array([0, 1], dtype=int8) For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c']) + >>> ci = pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) >>> ci.codes array([0, 1, 2, 0, 1, 2], dtype=int8) - >>> ci = pd.CategoricalIndex(['a', 'c'], categories=['c', 'b', 'a']) + >>> ci = pd.CategoricalIndex(["a", "c"], categories=["c", "b", "a"]) >>> ci.codes array([2, 0], dtype=int8) """ @@ -920,12 +921,12 @@ def _set_categories(self, categories, fastpath: bool = False) -> None: Examples -------- - >>> c = pd.Categorical(['a', 'b']) + >>> c = pd.Categorical(["a", "b"]) >>> c ['a', 'b'] Categories (2, object): ['a', 'b'] - >>> c._set_categories(pd.Index(['a', 'c'])) + >>> c._set_categories(pd.Index(["a", "c"])) >>> c ['a', 'c'] Categories (2, object): ['a', 'c'] @@ -989,7 +990,7 @@ def as_ordered(self) -> Self: -------- For :class:`pandas.Series`: - >>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category') + >>> ser = pd.Series(["a", "b", "c", "a"], dtype="category") >>> ser.cat.ordered False >>> ser = ser.cat.as_ordered() @@ -998,7 +999,7 @@ def as_ordered(self) -> Self: For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a']) + >>> ci = pd.CategoricalIndex(["a", "b", "c", "a"]) >>> ci.ordered False >>> ci = ci.as_ordered() @@ -1020,7 +1021,7 @@ def as_unordered(self) -> Self: -------- For :class:`pandas.Series`: - >>> raw_cat = pd.Categorical(['a', 'b', 'c', 'a'], ordered=True) + >>> raw_cat = pd.Categorical(["a", "b", "c", "a"], ordered=True) >>> ser = pd.Series(raw_cat) >>> ser.cat.ordered True @@ -1030,7 +1031,7 @@ def as_unordered(self) -> Self: For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a'], ordered=True) + >>> ci = pd.CategoricalIndex(["a", "b", "c", "a"], ordered=True) >>> ci.ordered True >>> ci = ci.as_unordered() @@ -1093,8 +1094,9 @@ def set_categories( -------- For :class:`pandas.Series`: - >>> raw_cat = pd.Categorical(['a', 'b', 'c', 'A'], - ... categories=['a', 'b', 'c'], ordered=True) + >>> raw_cat = pd.Categorical( + ... ["a", "b", "c", "A"], categories=["a", "b", "c"], ordered=True + ... ) >>> ser = pd.Series(raw_cat) >>> ser 0 a @@ -1104,7 +1106,7 @@ def set_categories( dtype: category Categories (3, object): ['a' < 'b' < 'c'] - >>> ser.cat.set_categories(['A', 'B', 'C'], rename=True) + >>> ser.cat.set_categories(["A", "B", "C"], rename=True) 0 A 1 B 2 C @@ -1114,16 +1116,17 @@ def set_categories( For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'A'], - ... categories=['a', 'b', 'c'], ordered=True) + >>> ci = pd.CategoricalIndex( + ... ["a", "b", "c", "A"], categories=["a", "b", "c"], ordered=True + ... ) >>> ci CategoricalIndex(['a', 'b', 'c', nan], categories=['a', 'b', 'c'], ordered=True, dtype='category') - >>> ci.set_categories(['A', 'b', 'c']) + >>> ci.set_categories(["A", "b", "c"]) CategoricalIndex([nan, 'b', 'c', nan], categories=['A', 'b', 'c'], ordered=True, dtype='category') - >>> ci.set_categories(['A', 'b', 'c'], rename=True) + >>> ci.set_categories(["A", "b", "c"], rename=True) CategoricalIndex(['A', 'b', 'c', nan], categories=['A', 'b', 'c'], ordered=True, dtype='category') """ @@ -1189,7 +1192,7 @@ def rename_categories(self, new_categories) -> Self: Examples -------- - >>> c = pd.Categorical(['a', 'a', 'b']) + >>> c = pd.Categorical(["a", "a", "b"]) >>> c.rename_categories([0, 1]) [0, 0, 1] Categories (2, int64): [0, 1] @@ -1197,7 +1200,7 @@ def rename_categories(self, new_categories) -> Self: For dict-like ``new_categories``, extra keys are ignored and categories not in the dictionary are passed through - >>> c.rename_categories({'a': 'A', 'c': 'C'}) + >>> c.rename_categories({"a": "A", "c": "C"}) ['A', 'A', 'b'] Categories (2, object): ['A', 'b'] @@ -1257,8 +1260,8 @@ def reorder_categories(self, new_categories, ordered=None) -> Self: -------- For :class:`pandas.Series`: - >>> ser = pd.Series(['a', 'b', 'c', 'a'], dtype='category') - >>> ser = ser.cat.reorder_categories(['c', 'b', 'a'], ordered=True) + >>> ser = pd.Series(["a", "b", "c", "a"], dtype="category") + >>> ser = ser.cat.reorder_categories(["c", "b", "a"], ordered=True) >>> ser 0 a 1 b @@ -1277,11 +1280,11 @@ def reorder_categories(self, new_categories, ordered=None) -> Self: For :class:`pandas.CategoricalIndex`: - >>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a']) + >>> ci = pd.CategoricalIndex(["a", "b", "c", "a"]) >>> ci CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, dtype='category') - >>> ci.reorder_categories(['c', 'b', 'a'], ordered=True) + >>> ci.reorder_categories(["c", "b", "a"], ordered=True) CategoricalIndex(['a', 'b', 'c', 'a'], categories=['c', 'b', 'a'], ordered=True, dtype='category') """ @@ -1327,12 +1330,12 @@ def add_categories(self, new_categories) -> Self: Examples -------- - >>> c = pd.Categorical(['c', 'b', 'c']) + >>> c = pd.Categorical(["c", "b", "c"]) >>> c ['c', 'b', 'c'] Categories (2, object): ['b', 'c'] - >>> c.add_categories(['d', 'a']) + >>> c.add_categories(["d", "a"]) ['c', 'b', 'c'] Categories (4, object): ['b', 'c', 'd', 'a'] """ @@ -1395,12 +1398,12 @@ def remove_categories(self, removals) -> Self: Examples -------- - >>> c = pd.Categorical(['a', 'c', 'b', 'c', 'd']) + >>> c = pd.Categorical(["a", "c", "b", "c", "d"]) >>> c ['a', 'c', 'b', 'c', 'd'] Categories (4, object): ['a', 'b', 'c', 'd'] - >>> c.remove_categories(['d', 'a']) + >>> c.remove_categories(["d", "a"]) [NaN, 'c', 'b', 'c', NaN] Categories (2, object): ['b', 'c'] """ @@ -1442,13 +1445,13 @@ def remove_unused_categories(self) -> Self: Examples -------- - >>> c = pd.Categorical(['a', 'c', 'b', 'c', 'd']) + >>> c = pd.Categorical(["a", "c", "b", "c", "d"]) >>> c ['a', 'c', 'b', 'c', 'd'] Categories (4, object): ['a', 'b', 'c', 'd'] - >>> c[2] = 'a' - >>> c[4] = 'c' + >>> c[2] = "a" + >>> c[4] = "c" >>> c ['a', 'c', 'a', 'c', 'c'] Categories (4, object): ['a', 'b', 'c', 'd'] @@ -1522,37 +1525,37 @@ def map( Examples -------- - >>> cat = pd.Categorical(['a', 'b', 'c']) + >>> cat = pd.Categorical(["a", "b", "c"]) >>> cat ['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] >>> cat.map(lambda x: x.upper(), na_action=None) ['A', 'B', 'C'] Categories (3, object): ['A', 'B', 'C'] - >>> cat.map({'a': 'first', 'b': 'second', 'c': 'third'}, na_action=None) + >>> cat.map({"a": "first", "b": "second", "c": "third"}, na_action=None) ['first', 'second', 'third'] Categories (3, object): ['first', 'second', 'third'] If the mapping is one-to-one the ordering of the categories is preserved: - >>> cat = pd.Categorical(['a', 'b', 'c'], ordered=True) + >>> cat = pd.Categorical(["a", "b", "c"], ordered=True) >>> cat ['a', 'b', 'c'] Categories (3, object): ['a' < 'b' < 'c'] - >>> cat.map({'a': 3, 'b': 2, 'c': 1}, na_action=None) + >>> cat.map({"a": 3, "b": 2, "c": 1}, na_action=None) [3, 2, 1] Categories (3, int64): [3 < 2 < 1] If the mapping is not one-to-one an :class:`~pandas.Index` is returned: - >>> cat.map({'a': 'first', 'b': 'second', 'c': 'first'}, na_action=None) + >>> cat.map({"a": "first", "b": "second", "c": "first"}, na_action=None) Index(['first', 'second', 'first'], dtype='object') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: - >>> cat.map({'a': 'first', 'b': 'second'}, na_action=None) + >>> cat.map({"a": "first", "b": "second"}, na_action=None) Index(['first', 'second', nan], dtype='object') """ if na_action is lib.no_default: @@ -1664,7 +1667,7 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray: Examples -------- - >>> cat = pd.Categorical(['a', 'b'], ordered=True) + >>> cat = pd.Categorical(["a", "b"], ordered=True) The following calls ``cat.__array__`` @@ -1932,12 +1935,12 @@ def argsort( Examples -------- - >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort() + >>> pd.Categorical(["b", "b", "a", "c"]).argsort() array([2, 0, 1, 3]) - >>> cat = pd.Categorical(['b', 'b', 'a', 'c'], - ... categories=['c', 'b', 'a'], - ... ordered=True) + >>> cat = pd.Categorical( + ... ["b", "b", "a", "c"], categories=["c", "b", "a"], ordered=True + ... ) >>> cat.argsort() array([3, 0, 1, 2]) @@ -2031,10 +2034,10 @@ def sort_values( >>> c.sort_values(ascending=False) [5, 2, 2, NaN, NaN] Categories (2, int64): [2, 5] - >>> c.sort_values(na_position='first') + >>> c.sort_values(na_position="first") [NaN, NaN, 2, 2, 5] Categories (2, int64): [2, 5] - >>> c.sort_values(ascending=False, na_position='first') + >>> c.sort_values(ascending=False, na_position="first") [NaN, NaN, 5, 2, 2] Categories (2, int64): [2, 5] """ @@ -2348,7 +2351,7 @@ def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]: Examples -------- - >>> c = pd.Categorical(list('aabca')) + >>> c = pd.Categorical(list("aabca")) >>> c ['a', 'a', 'b', 'c', 'a'] Categories (3, object): ['a', 'b', 'c'] @@ -2632,15 +2635,14 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: Examples -------- - >>> s = pd.Categorical(['lama', 'cow', 'lama', 'beetle', 'lama', - ... 'hippo']) - >>> s.isin(['cow', 'lama']) + >>> s = pd.Categorical(["llama", "cow", "llama", "beetle", "llama", "hippo"]) + >>> s.isin(["cow", "llama"]) array([ True, True, True, False, True, False]) - Passing a single string as ``s.isin('lama')`` will raise an error. Use + Passing a single string as ``s.isin('llama')`` will raise an error. Use a list of one element instead: - >>> s.isin(['lama']) + >>> s.isin(["llama"]) array([ True, False, True, False, True, False]) """ null_mask = np.asarray(isna(values)) @@ -3007,8 +3009,8 @@ def recode_for_categories( Examples -------- - >>> old_cat = pd.Index(['b', 'a', 'c']) - >>> new_cat = pd.Index(['a', 'b']) + >>> old_cat = pd.Index(["b", "a", "c"]) + >>> new_cat = pd.Index(["a", "b"]) >>> codes = np.array([0, 1, 1, 2]) >>> recode_for_categories(codes, old_cat, new_cat) array([ 1, 0, 0, -1], dtype=int8) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 1e52cb1ee46e1..4194ffcee2e44 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -270,7 +270,7 @@ def _unbox_scalar( Examples -------- - >>> arr = pd.array(np.array(['1970-01-01'], 'datetime64[ns]')) + >>> arr = pd.array(np.array(["1970-01-01"], "datetime64[ns]")) >>> arr._unbox_scalar(arr[0]) numpy.datetime64('1970-01-01T00:00:00.000000000') """ @@ -889,8 +889,9 @@ def freqstr(self) -> str | None: The frequency can be inferred if there are more than 2 points: - >>> idx = pd.DatetimeIndex(["2018-01-01", "2018-01-03", "2018-01-05"], - ... freq="infer") + >>> idx = pd.DatetimeIndex( + ... ["2018-01-01", "2018-01-03", "2018-01-05"], freq="infer" + ... ) >>> idx.freqstr '2D' @@ -1596,7 +1597,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0): -------- For :class:`pandas.DatetimeIndex`: - >>> idx = pd.date_range('2001-01-01 00:00', periods=3) + >>> idx = pd.date_range("2001-01-01 00:00", periods=3) >>> idx DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'], dtype='datetime64[ns]', freq='D') @@ -1605,7 +1606,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0): For :class:`pandas.TimedeltaIndex`: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None) @@ -1775,9 +1776,8 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: Examples -------- - >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), - ... periods=3, freq='s') - >>> rng.strftime('%%B %%d, %%Y, %%r') + >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"), periods=3, freq="s") + >>> rng.strftime("%%B %%d, %%Y, %%r") Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM', 'March 10, 2018, 09:00:02 AM'], dtype='object') diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4b804598681fa..bc8d170b73fd0 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -210,9 +210,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): # type: ignore[misc] Examples -------- >>> pd.arrays.DatetimeArray._from_sequence( - ... pd.DatetimeIndex( - ... ["2023-01-01", "2023-01-02"], freq="D" - ... ) + ... pd.DatetimeIndex(["2023-01-01", "2023-01-02"], freq="D") ... ) ['2023-01-01 00:00:00', '2023-01-02 00:00:00'] @@ -611,8 +609,9 @@ def tz(self) -> tzinfo | None: For DatetimeIndex: - >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", - ... "2/1/2020 11:00:00+00:00"]) + >>> idx = pd.DatetimeIndex( + ... ["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"] + ... ) >>> idx.tz datetime.timezone.utc """ @@ -888,8 +887,9 @@ def tz_convert(self, tz) -> Self: With the `tz` parameter, we can change the DatetimeIndex to other time zones: - >>> dti = pd.date_range(start='2014-08-01 09:00', - ... freq='h', periods=3, tz='Europe/Berlin') + >>> dti = pd.date_range( + ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" + ... ) >>> dti DatetimeIndex(['2014-08-01 09:00:00+02:00', @@ -897,7 +897,7 @@ def tz_convert(self, tz) -> Self: '2014-08-01 11:00:00+02:00'], dtype='datetime64[ns, Europe/Berlin]', freq='h') - >>> dti.tz_convert('US/Central') + >>> dti.tz_convert("US/Central") DatetimeIndex(['2014-08-01 02:00:00-05:00', '2014-08-01 03:00:00-05:00', '2014-08-01 04:00:00-05:00'], @@ -906,8 +906,9 @@ def tz_convert(self, tz) -> Self: With the ``tz=None``, we can remove the timezone (after converting to UTC if necessary): - >>> dti = pd.date_range(start='2014-08-01 09:00', freq='h', - ... periods=3, tz='Europe/Berlin') + >>> dti = pd.date_range( + ... start="2014-08-01 09:00", freq="h", periods=3, tz="Europe/Berlin" + ... ) >>> dti DatetimeIndex(['2014-08-01 09:00:00+02:00', @@ -1131,7 +1132,7 @@ def to_pydatetime(self) -> npt.NDArray[np.object_]: Examples -------- - >>> idx = pd.date_range('2018-02-27', periods=3) + >>> idx = pd.date_range("2018-02-27", periods=3) >>> idx.to_pydatetime() array([datetime.datetime(2018, 2, 27, 0, 0), datetime.datetime(2018, 2, 28, 0, 0), @@ -1164,8 +1165,9 @@ def normalize(self) -> Self: Examples -------- - >>> idx = pd.date_range(start='2014-08-01 10:00', freq='h', - ... periods=3, tz='Asia/Calcutta') + >>> idx = pd.date_range( + ... start="2014-08-01 10:00", freq="h", periods=3, tz="Asia/Calcutta" + ... ) >>> idx DatetimeIndex(['2014-08-01 10:00:00+05:30', '2014-08-01 11:00:00+05:30', @@ -1215,10 +1217,16 @@ def to_period(self, freq=None) -> PeriodArray: Examples -------- - >>> df = pd.DataFrame({"y": [1, 2, 3]}, - ... index=pd.to_datetime(["2000-03-31 00:00:00", - ... "2000-05-31 00:00:00", - ... "2000-08-31 00:00:00"])) + >>> df = pd.DataFrame( + ... {"y": [1, 2, 3]}, + ... index=pd.to_datetime( + ... [ + ... "2000-03-31 00:00:00", + ... "2000-05-31 00:00:00", + ... "2000-08-31 00:00:00", + ... ] + ... ), + ... ) >>> df.index.to_period("M") PeriodIndex(['2000-03', '2000-05', '2000-08'], dtype='period[M]') @@ -1283,7 +1291,7 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: Examples -------- - >>> s = pd.Series(pd.date_range(start='2018-01', freq='ME', periods=3)) + >>> s = pd.Series(pd.date_range(start="2018-01", freq="ME", periods=3)) >>> s 0 2018-01-31 1 2018-02-28 @@ -1295,7 +1303,7 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: 2 March dtype: object - >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3) + >>> idx = pd.date_range(start="2018-01", freq="ME", periods=3) >>> idx DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq='ME') @@ -1306,11 +1314,11 @@ def month_name(self, locale=None) -> npt.NDArray[np.object_]: for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month names in Brazilian Portuguese language. - >>> idx = pd.date_range(start='2018-01', freq='ME', periods=3) + >>> idx = pd.date_range(start="2018-01", freq="ME", periods=3) >>> idx DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'], dtype='datetime64[ns]', freq='ME') - >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP + >>> idx.month_name(locale="pt_BR.utf8") # doctest: +SKIP Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object') """ values = self._local_timestamps() @@ -1340,7 +1348,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]: Examples -------- - >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3)) + >>> s = pd.Series(pd.date_range(start="2018-01-01", freq="D", periods=3)) >>> s 0 2018-01-01 1 2018-01-02 @@ -1352,7 +1360,7 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]: 2 Wednesday dtype: object - >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) + >>> idx = pd.date_range(start="2018-01-01", freq="D", periods=3) >>> idx DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], dtype='datetime64[ns]', freq='D') @@ -1363,11 +1371,11 @@ def day_name(self, locale=None) -> npt.NDArray[np.object_]: for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day names in Brazilian Portuguese language. - >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3) + >>> idx = pd.date_range(start="2018-01-01", freq="D", periods=3) >>> idx DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'], dtype='datetime64[ns]', freq='D') - >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP + >>> idx.day_name(locale="pt_BR.utf8") # doctest: +SKIP Index(['Segunda', 'Terça', 'Quarta'], dtype='object') """ values = self._local_timestamps() @@ -1402,8 +1410,9 @@ def time(self) -> npt.NDArray[np.object_]: For DatetimeIndex: - >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", - ... "2/1/2020 11:00:00+00:00"]) + >>> idx = pd.DatetimeIndex( + ... ["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"] + ... ) >>> idx.time array([datetime.time(10, 0), datetime.time(11, 0)], dtype=object) """ @@ -1438,8 +1447,9 @@ def timetz(self) -> npt.NDArray[np.object_]: For DatetimeIndex: - >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", - ... "2/1/2020 11:00:00+00:00"]) + >>> idx = pd.DatetimeIndex( + ... ["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"] + ... ) >>> idx.timetz array([datetime.time(10, 0, tzinfo=datetime.timezone.utc), datetime.time(11, 0, tzinfo=datetime.timezone.utc)], dtype=object) @@ -1471,8 +1481,9 @@ def date(self) -> npt.NDArray[np.object_]: For DatetimeIndex: - >>> idx = pd.DatetimeIndex(["1/1/2020 10:00:00+00:00", - ... "2/1/2020 11:00:00+00:00"]) + >>> idx = pd.DatetimeIndex( + ... ["1/1/2020 10:00:00+00:00", "2/1/2020 11:00:00+00:00"] + ... ) >>> idx.date array([datetime.date(2020, 1, 1), datetime.date(2020, 2, 1)], dtype=object) """ @@ -1501,7 +1512,7 @@ def isocalendar(self) -> DataFrame: Examples -------- - >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4) + >>> idx = pd.date_range(start="2019-12-29", freq="D", periods=4) >>> idx.isocalendar() year week day 2019-12-29 2019 52 7 @@ -2169,7 +2180,7 @@ def std( -------- For :class:`pandas.DatetimeIndex`: - >>> idx = pd.date_range('2001-01-01 00:00', periods=3) + >>> idx = pd.date_range("2001-01-01 00:00", periods=3) >>> idx DatetimeIndex(['2001-01-01', '2001-01-02', '2001-01-03'], dtype='datetime64[ns]', freq='D') diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index f9384e25ba9d9..dc453f3e37c50 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -115,12 +115,12 @@ class IntegerArray(NumericArray): String aliases for the dtypes are also available. They are capitalized. - >>> pd.array([1, None, 3], dtype='Int32') + >>> pd.array([1, None, 3], dtype="Int32") [1, , 3] Length: 3, dtype: Int32 - >>> pd.array([1, None, 3], dtype='UInt16') + >>> pd.array([1, None, 3], dtype="UInt16") [1, , 3] Length: 3, dtype: UInt16 diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8bbc4976675c8..ab79622ddd8be 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -172,8 +172,7 @@ class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin): # type: ignore[misc] Examples -------- - >>> pd.arrays.PeriodArray(pd.PeriodIndex(['2023-01-01', - ... '2023-01-02'], freq='D')) + >>> pd.arrays.PeriodArray(pd.PeriodIndex(["2023-01-01", "2023-01-02"], freq="D")) ['2023-01-01', '2023-01-02'] Length: 2, dtype: period[D] @@ -719,16 +718,16 @@ def asfreq(self, freq=None, how: str = "E") -> Self: Examples -------- - >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='Y') + >>> pidx = pd.period_range("2010-01-01", "2015-01-01", freq="Y") >>> pidx PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'], dtype='period[Y-DEC]') - >>> pidx.asfreq('M') + >>> pidx.asfreq("M") PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12', '2015-12'], dtype='period[M]') - >>> pidx.asfreq('M', how='S') + >>> pidx.asfreq("M", how="S") PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01', '2015-01'], dtype='period[M]') """ @@ -1035,29 +1034,26 @@ def period_array( Examples -------- - >>> period_array([pd.Period('2017', freq='Y'), - ... pd.Period('2018', freq='Y')]) + >>> period_array([pd.Period("2017", freq="Y"), pd.Period("2018", freq="Y")]) ['2017', '2018'] Length: 2, dtype: period[Y-DEC] - >>> period_array([pd.Period('2017', freq='Y'), - ... pd.Period('2018', freq='Y'), - ... pd.NaT]) + >>> period_array([pd.Period("2017", freq="Y"), pd.Period("2018", freq="Y"), pd.NaT]) ['2017', '2018', 'NaT'] Length: 3, dtype: period[Y-DEC] Integers that look like years are handled - >>> period_array([2000, 2001, 2002], freq='D') + >>> period_array([2000, 2001, 2002], freq="D") ['2000-01-01', '2001-01-01', '2002-01-01'] Length: 3, dtype: period[D] Datetime-like strings may also be passed - >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q') + >>> period_array(["2000-Q1", "2000-Q2", "2000-Q3", "2000-Q4"], freq="Q") ['2000Q1', '2000Q2', '2000Q3', '2000Q4'] Length: 4, dtype: period[Q-DEC] diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index a1d81aeeecb0b..6608fcce2cd62 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -156,7 +156,7 @@ def to_coo( ... (1, 1, "b", 0), ... (1, 1, "b", 1), ... (2, 1, "b", 0), - ... (2, 1, "b", 1) + ... (2, 1, "b", 1), ... ], ... names=["A", "B", "C", "D"], ... ) @@ -244,8 +244,7 @@ class SparseFrameAccessor(BaseAccessor, PandasDelegate): Examples -------- - >>> df = pd.DataFrame({"a": [1, 2, 0, 0], - ... "b": [3, 0, 0, 4]}, dtype="Sparse[int]") + >>> df = pd.DataFrame({"a": [1, 2, 0, 0], "b": [3, 0, 0, 4]}, dtype="Sparse[int]") >>> df.sparse.density 0.5 """ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index fafeedc01b02b..5369839126e48 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1251,7 +1251,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): IntIndex Indices: array([2, 3], dtype=int32) - >>> arr.astype(SparseDtype(np.dtype('int32'))) + >>> arr.astype(SparseDtype(np.dtype("int32"))) [0, 0, 1, 2] Fill: 0 IntIndex @@ -1260,7 +1260,7 @@ def astype(self, dtype: AstypeArg | None = None, copy: bool = True): Using a NumPy dtype with a different kind (e.g. float) will coerce just ``self.sp_values``. - >>> arr.astype(SparseDtype(np.dtype('float64'))) + >>> arr.astype(SparseDtype(np.dtype("float64"))) ... # doctest: +NORMALIZE_WHITESPACE [nan, nan, 1.0, 2.0] Fill: nan diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index ecad5b481f952..5a803c9064db9 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -327,7 +327,7 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] Examples -------- - >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string") + >>> pd.array(["This is", "some text", None, "data."], dtype="string") ['This is', 'some text', , 'data.'] Length: 4, dtype: string @@ -335,11 +335,11 @@ class StringArray(BaseStringArray, NumpyExtensionArray): # type: ignore[misc] Unlike arrays instantiated with ``dtype="object"``, ``StringArray`` will convert the values to strings. - >>> pd.array(['1', 1], dtype="object") + >>> pd.array(["1", 1], dtype="object") ['1', 1] Length: 2, dtype: object - >>> pd.array(['1', 1], dtype="string") + >>> pd.array(["1", 1], dtype="string") ['1', '1'] Length: 2, dtype: string diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index a76eef8095695..ba02c63c00ce4 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -117,7 +117,7 @@ class ArrowStringArray(ObjectStringArrayMixin, ArrowExtensionArray, BaseStringAr Examples -------- - >>> pd.array(['This is', 'some text', None, 'data.'], dtype="string[pyarrow]") + >>> pd.array(["This is", "some text", None, "data."], dtype="string[pyarrow]") ['This is', 'some text', , 'data.'] Length: 4, dtype: string diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 58455f8cb8398..51075939276f7 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -132,7 +132,7 @@ class TimedeltaArray(dtl.TimelikeOps): Examples -------- - >>> pd.arrays.TimedeltaArray._from_sequence(pd.TimedeltaIndex(['1h', '2h'])) + >>> pd.arrays.TimedeltaArray._from_sequence(pd.TimedeltaIndex(["1h", "2h"])) ['0 days 01:00:00', '0 days 02:00:00'] Length: 2, dtype: timedelta64[ns] @@ -747,7 +747,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: -------- **Series** - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d')) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d")) >>> s 0 0 days 1 1 days @@ -766,7 +766,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: **TimedeltaIndex** - >>> idx = pd.to_timedelta(np.arange(5), unit='d') + >>> idx = pd.to_timedelta(np.arange(5), unit="d") >>> idx TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) @@ -787,7 +787,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: Examples -------- - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='D') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None) @@ -945,7 +945,7 @@ def components(self) -> DataFrame: Examples -------- - >>> tdelta_idx = pd.to_timedelta(['1 day 3 min 2 us 42 ns']) + >>> tdelta_idx = pd.to_timedelta(["1 day 3 min 2 us 42 ns"]) >>> tdelta_idx TimedeltaIndex(['1 days 00:03:00.000002042'], dtype='timedelta64[ns]', freq=None) diff --git a/pandas/core/base.py b/pandas/core/base.py index 490daa656f603..a1484d9ad032b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -367,7 +367,7 @@ def ndim(self) -> Literal[1]: Examples -------- - >>> s = pd.Series(['Ant', 'Bear', 'Cow']) + >>> s = pd.Series(["Ant", "Bear", "Cow"]) >>> s 0 Ant 1 Bear @@ -409,7 +409,7 @@ def item(self): For an index: - >>> s = pd.Series([1], index=['a']) + >>> s = pd.Series([1], index=["a"]) >>> s.index.item() 'a' """ @@ -426,7 +426,7 @@ def nbytes(self) -> int: -------- For Series: - >>> s = pd.Series(['Ant', 'Bear', 'Cow']) + >>> s = pd.Series(["Ant", "Bear", "Cow"]) >>> s 0 Ant 1 Bear @@ -454,7 +454,7 @@ def size(self) -> int: -------- For Series: - >>> s = pd.Series(['Ant', 'Bear', 'Cow']) + >>> s = pd.Series(["Ant", "Bear", "Cow"]) >>> s 0 Ant 1 Bear @@ -531,7 +531,7 @@ def array(self) -> ExtensionArray: For extension types, like Categorical, the actual ExtensionArray is returned - >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser = pd.Series(pd.Categorical(["a", "b", "a"])) >>> ser.array ['a', 'b', 'a'] Categories (2, object): ['a', 'b'] @@ -610,7 +610,7 @@ def to_numpy( Examples -------- - >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) + >>> ser = pd.Series(pd.Categorical(["a", "b", "a"])) >>> ser.to_numpy() array(['a', 'b', 'a'], dtype=object) @@ -618,7 +618,7 @@ def to_numpy( Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` objects, each with the correct ``tz``. - >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> ser = pd.Series(pd.date_range("2000", periods=2, tz="CET")) >>> ser.to_numpy(dtype=object) array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), Timestamp('2000-01-02 00:00:00+0100', tz='CET')], @@ -713,8 +713,15 @@ def argmax( -------- Consider dataset containing cereal calories - >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, - ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) + >>> s = pd.Series( + ... [100.0, 110.0, 120.0, 110.0], + ... index=[ + ... "Corn Flakes", + ... "Almond Delight", + ... "Cinnamon Toast Crunch", + ... "Cocoa Puff", + ... ], + ... ) >>> s Corn Flakes 100.0 Almond Delight 110.0 diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d41a9c80a10ec..7b35d451c1120 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -169,7 +169,7 @@ def array( would no longer return a :class:`arrays.NumpyExtensionArray` backed by a NumPy array. - >>> pd.array(['a', 'b'], dtype=str) + >>> pd.array(["a", "b"], dtype=str) ['a', 'b'] Length: 2, dtype: str32 @@ -178,7 +178,7 @@ def array( data. If you really need the new array to be backed by a NumPy array, specify that in the dtype. - >>> pd.array(['a', 'b'], dtype=np.dtype(">> pd.array(["a", "b"], dtype=np.dtype(" ['a', 'b'] Length: 2, dtype: str32 @@ -193,12 +193,12 @@ def array( rather than a ``NumpyExtensionArray``. This is for symmetry with the case of timezone-aware data, which NumPy does not natively support. - >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') + >>> pd.array(["2015", "2016"], dtype="datetime64[ns]") ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] Length: 2, dtype: datetime64[ns] - >>> pd.array(["1h", "2h"], dtype='timedelta64[ns]') + >>> pd.array(["1h", "2h"], dtype="timedelta64[ns]") ['0 days 01:00:00', '0 days 02:00:00'] Length: 2, dtype: timedelta64[ns] @@ -230,27 +230,27 @@ def array( >>> with pd.option_context("string_storage", "pyarrow"): ... arr = pd.array(["a", None, "c"]) - ... >>> arr ['a', , 'c'] Length: 3, dtype: string - >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) + >>> pd.array([pd.Period("2000", freq="D"), pd.Period("2000", freq="D")]) ['2000-01-01', '2000-01-01'] Length: 2, dtype: period[D] You can use the string alias for `dtype` - >>> pd.array(['a', 'b', 'a'], dtype='category') + >>> pd.array(["a", "b", "a"], dtype="category") ['a', 'b', 'a'] Categories (2, object): ['a', 'b'] Or specify the actual dtype - >>> pd.array(['a', 'b', 'a'], - ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) + >>> pd.array( + ... ["a", "b", "a"], dtype=pd.CategoricalDtype(["a", "b", "c"], ordered=True) + ... ) ['a', 'b', 'a'] Categories (3, object): ['a' < 'b' < 'c'] @@ -439,7 +439,7 @@ def extract_array( Examples -------- - >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) + >>> extract_array(pd.Series(["a", "b", "c"], dtype="category")) ['a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 6b00a5284ec5b..41407704dfc8a 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -96,8 +96,7 @@ class property**. >>> from pandas.api.extensions import ExtensionArray >>> class ExtensionDtype: ... def __from_arrow__( - ... self, - ... array: pyarrow.Array | pyarrow.ChunkedArray + ... self, array: pyarrow.Array | pyarrow.ChunkedArray ... ) -> ExtensionArray: ... ... diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index dfe12872c3916..b8b73e7dc6ddb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -893,10 +893,10 @@ def infer_dtype_from_array(arr) -> tuple[DtypeObj, ArrayLike]: Examples -------- - >>> np.asarray([1, '1']) + >>> np.asarray([1, "1"]) array(['1', '1'], dtype='>> infer_dtype_from_array([1, '1']) + >>> infer_dtype_from_array([1, "1"]) (dtype('O'), [1, '1']) """ if isinstance(arr, np.ndarray): diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a53bbe9935684..99114d996cc4c 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -367,7 +367,7 @@ def is_timedelta64_dtype(arr_or_dtype) -> bool: False >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) True - >>> is_timedelta64_dtype('0 days') + >>> is_timedelta64_dtype("0 days") False """ if isinstance(arr_or_dtype, np.dtype): @@ -544,7 +544,7 @@ def is_string_dtype(arr_or_dtype) -> bool: True >>> is_string_dtype(int) False - >>> is_string_dtype(np.array(['a', 'b'])) + >>> is_string_dtype(np.array(["a", "b"])) True >>> is_string_dtype(pd.Series([1, 2])) False @@ -646,9 +646,9 @@ def is_integer_dtype(arr_or_dtype) -> bool: False >>> is_integer_dtype(np.uint64) True - >>> is_integer_dtype('int8') + >>> is_integer_dtype("int8") True - >>> is_integer_dtype('Int8') + >>> is_integer_dtype("Int8") True >>> is_integer_dtype(pd.Int8Dtype) True @@ -656,13 +656,13 @@ def is_integer_dtype(arr_or_dtype) -> bool: False >>> is_integer_dtype(np.timedelta64) False - >>> is_integer_dtype(np.array(['a', 'b'])) + >>> is_integer_dtype(np.array(["a", "b"])) False >>> is_integer_dtype(pd.Series([1, 2])) True >>> is_integer_dtype(np.array([], dtype=np.timedelta64)) False - >>> is_integer_dtype(pd.Index([1, 2.])) # float + >>> is_integer_dtype(pd.Index([1, 2.0])) # float False """ return _is_dtype_type( @@ -703,9 +703,9 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool: False >>> is_signed_integer_dtype(np.uint64) # unsigned False - >>> is_signed_integer_dtype('int8') + >>> is_signed_integer_dtype("int8") True - >>> is_signed_integer_dtype('Int8') + >>> is_signed_integer_dtype("Int8") True >>> is_signed_integer_dtype(pd.Int8Dtype) True @@ -713,13 +713,13 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool: False >>> is_signed_integer_dtype(np.timedelta64) False - >>> is_signed_integer_dtype(np.array(['a', 'b'])) + >>> is_signed_integer_dtype(np.array(["a", "b"])) False >>> is_signed_integer_dtype(pd.Series([1, 2])) True >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64)) False - >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float + >>> is_signed_integer_dtype(pd.Index([1, 2.0])) # float False >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned False @@ -759,17 +759,17 @@ def is_unsigned_integer_dtype(arr_or_dtype) -> bool: False >>> is_unsigned_integer_dtype(np.uint64) True - >>> is_unsigned_integer_dtype('uint8') + >>> is_unsigned_integer_dtype("uint8") True - >>> is_unsigned_integer_dtype('UInt8') + >>> is_unsigned_integer_dtype("UInt8") True >>> is_unsigned_integer_dtype(pd.UInt8Dtype) True - >>> is_unsigned_integer_dtype(np.array(['a', 'b'])) + >>> is_unsigned_integer_dtype(np.array(["a", "b"])) False >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed False - >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float + >>> is_unsigned_integer_dtype(pd.Index([1, 2.0])) # float False >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32)) True @@ -815,9 +815,9 @@ def is_int64_dtype(arr_or_dtype) -> bool: False >>> is_int64_dtype(np.int64) # doctest: +SKIP True - >>> is_int64_dtype('int8') # doctest: +SKIP + >>> is_int64_dtype("int8") # doctest: +SKIP False - >>> is_int64_dtype('Int8') # doctest: +SKIP + >>> is_int64_dtype("Int8") # doctest: +SKIP False >>> is_int64_dtype(pd.Int64Dtype) # doctest: +SKIP True @@ -825,11 +825,11 @@ def is_int64_dtype(arr_or_dtype) -> bool: False >>> is_int64_dtype(np.uint64) # unsigned # doctest: +SKIP False - >>> is_int64_dtype(np.array(['a', 'b'])) # doctest: +SKIP + >>> is_int64_dtype(np.array(["a", "b"])) # doctest: +SKIP False >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) # doctest: +SKIP True - >>> is_int64_dtype(pd.Index([1, 2.])) # float # doctest: +SKIP + >>> is_int64_dtype(pd.Index([1, 2.0])) # float # doctest: +SKIP False >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned # doctest: +SKIP False @@ -870,7 +870,7 @@ def is_datetime64_any_dtype(arr_or_dtype) -> bool: True >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern")) True - >>> is_datetime64_any_dtype(np.array(['a', 'b'])) + >>> is_datetime64_any_dtype(np.array(["a", "b"])) False >>> is_datetime64_any_dtype(np.array([1, 2])) False @@ -923,7 +923,7 @@ def is_datetime64_ns_dtype(arr_or_dtype) -> bool: False >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern")) True - >>> is_datetime64_ns_dtype(np.array(['a', 'b'])) + >>> is_datetime64_ns_dtype(np.array(["a", "b"])) False >>> is_datetime64_ns_dtype(np.array([1, 2])) False @@ -965,11 +965,11 @@ def is_timedelta64_ns_dtype(arr_or_dtype) -> bool: Examples -------- >>> from pandas.core.dtypes.common import is_timedelta64_ns_dtype - >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]')) + >>> is_timedelta64_ns_dtype(np.dtype("m8[ns]")) True - >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency + >>> is_timedelta64_ns_dtype(np.dtype("m8[ps]")) # Wrong frequency False - >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]')) + >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype="m8[ns]")) True >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) False @@ -1051,7 +1051,7 @@ def needs_i8_conversion(dtype: DtypeObj | None) -> bool: False >>> needs_i8_conversion(np.dtype(np.datetime64)) True - >>> needs_i8_conversion(np.array(['a', 'b'])) + >>> needs_i8_conversion(np.array(["a", "b"])) False >>> needs_i8_conversion(pd.Series([1, 2])) False @@ -1096,11 +1096,11 @@ def is_numeric_dtype(arr_or_dtype) -> bool: False >>> is_numeric_dtype(np.timedelta64) False - >>> is_numeric_dtype(np.array(['a', 'b'])) + >>> is_numeric_dtype(np.array(["a", "b"])) False >>> is_numeric_dtype(pd.Series([1, 2])) True - >>> is_numeric_dtype(pd.Index([1, 2.])) + >>> is_numeric_dtype(pd.Index([1, 2.0])) True >>> is_numeric_dtype(np.array([], dtype=np.timedelta64)) False @@ -1172,11 +1172,11 @@ def is_float_dtype(arr_or_dtype) -> bool: False >>> is_float_dtype(float) True - >>> is_float_dtype(np.array(['a', 'b'])) + >>> is_float_dtype(np.array(["a", "b"])) False >>> is_float_dtype(pd.Series([1, 2])) False - >>> is_float_dtype(pd.Index([1, 2.])) + >>> is_float_dtype(pd.Index([1, 2.0])) True """ return _is_dtype_type(arr_or_dtype, classes(np.floating)) or _is_dtype( @@ -1214,7 +1214,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: True >>> is_bool_dtype(np.bool_) True - >>> is_bool_dtype(np.array(['a', 'b'])) + >>> is_bool_dtype(np.array(["a", "b"])) False >>> is_bool_dtype(pd.Series([1, 2])) False @@ -1298,13 +1298,13 @@ def is_extension_array_dtype(arr_or_dtype) -> bool: Examples -------- >>> from pandas.api.types import is_extension_array_dtype - >>> arr = pd.Categorical(['a', 'b']) + >>> arr = pd.Categorical(["a", "b"]) >>> is_extension_array_dtype(arr) True >>> is_extension_array_dtype(arr.dtype) True - >>> arr = np.array(['a', 'b']) + >>> arr = np.array(["a", "b"]) >>> is_extension_array_dtype(arr.dtype) False """ @@ -1351,7 +1351,7 @@ def is_complex_dtype(arr_or_dtype) -> bool: False >>> is_complex_dtype(np.complex128) True - >>> is_complex_dtype(np.array(['a', 'b'])) + >>> is_complex_dtype(np.array(["a", "b"])) False >>> is_complex_dtype(pd.Series([1, 2])) False diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9ec662a6cd352..7d5e88b502a00 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -278,8 +278,8 @@ def union_categoricals( containing categorical data, but note that the resulting array will always be a plain `Categorical` - >>> a = pd.Series(["b", "c"], dtype='category') - >>> b = pd.Series(["a", "b"], dtype='category') + >>> a = pd.Series(["b", "c"], dtype="category") + >>> b = pd.Series(["a", "b"], dtype="category") >>> pd.api.types.union_categoricals([a, b]) ['b', 'c', 'a', 'b'] Categories (3, object): ['b', 'c', 'a'] diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 5afb77b89c8d5..68c7ab6cbdbd1 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -188,8 +188,8 @@ class CategoricalDtype(PandasExtensionDtype, ExtensionDtype): Examples -------- - >>> t = pd.CategoricalDtype(categories=['b', 'a'], ordered=True) - >>> pd.Series(['a', 'b', 'a', 'c'], dtype=t) + >>> t = pd.CategoricalDtype(categories=["b", "a"], ordered=True) + >>> pd.Series(["a", "b", "a", "c"], dtype=t) 0 a 1 b 2 a @@ -286,14 +286,14 @@ def _from_values_or_dtype( >>> pd.CategoricalDtype._from_values_or_dtype() CategoricalDtype(categories=None, ordered=None, categories_dtype=None) >>> pd.CategoricalDtype._from_values_or_dtype( - ... categories=['a', 'b'], ordered=True + ... categories=["a", "b"], ordered=True ... ) CategoricalDtype(categories=['a', 'b'], ordered=True, categories_dtype=object) - >>> dtype1 = pd.CategoricalDtype(['a', 'b'], ordered=True) - >>> dtype2 = pd.CategoricalDtype(['x', 'y'], ordered=False) + >>> dtype1 = pd.CategoricalDtype(["a", "b"], ordered=True) + >>> dtype2 = pd.CategoricalDtype(["x", "y"], ordered=False) >>> c = pd.Categorical([0, 1], dtype=dtype1) >>> pd.CategoricalDtype._from_values_or_dtype( - ... c, ['x', 'y'], ordered=True, dtype=dtype2 + ... c, ["x", "y"], ordered=True, dtype=dtype2 ... ) Traceback (most recent call last): ... @@ -621,7 +621,7 @@ def categories(self) -> Index: Examples -------- - >>> cat_type = pd.CategoricalDtype(categories=['a', 'b'], ordered=True) + >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=True) >>> cat_type.categories Index(['a', 'b'], dtype='object') """ @@ -634,11 +634,11 @@ def ordered(self) -> Ordered: Examples -------- - >>> cat_type = pd.CategoricalDtype(categories=['a', 'b'], ordered=True) + >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=True) >>> cat_type.ordered True - >>> cat_type = pd.CategoricalDtype(categories=['a', 'b'], ordered=False) + >>> cat_type = pd.CategoricalDtype(categories=["a", "b"], ordered=False) >>> cat_type.ordered False """ @@ -717,10 +717,10 @@ class DatetimeTZDtype(PandasExtensionDtype): Examples -------- >>> from zoneinfo import ZoneInfo - >>> pd.DatetimeTZDtype(tz=ZoneInfo('UTC')) + >>> pd.DatetimeTZDtype(tz=ZoneInfo("UTC")) datetime64[ns, UTC] - >>> pd.DatetimeTZDtype(tz=ZoneInfo('Europe/Paris')) + >>> pd.DatetimeTZDtype(tz=ZoneInfo("Europe/Paris")) datetime64[ns, Europe/Paris] """ @@ -793,7 +793,7 @@ def unit(self) -> str_type: Examples -------- >>> from zoneinfo import ZoneInfo - >>> dtype = pd.DatetimeTZDtype(tz=ZoneInfo('America/Los_Angeles')) + >>> dtype = pd.DatetimeTZDtype(tz=ZoneInfo("America/Los_Angeles")) >>> dtype.unit 'ns' """ @@ -807,7 +807,7 @@ def tz(self) -> tzinfo: Examples -------- >>> from zoneinfo import ZoneInfo - >>> dtype = pd.DatetimeTZDtype(tz=ZoneInfo('America/Los_Angeles')) + >>> dtype = pd.DatetimeTZDtype(tz=ZoneInfo("America/Los_Angeles")) >>> dtype.tz zoneinfo.ZoneInfo(key='America/Los_Angeles') """ @@ -840,7 +840,7 @@ def construct_from_string(cls, string: str_type) -> DatetimeTZDtype: Examples -------- - >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + >>> DatetimeTZDtype.construct_from_string("datetime64[ns, UTC]") datetime64[ns, UTC] """ if not isinstance(string, str): @@ -962,7 +962,7 @@ class PeriodDtype(PeriodDtypeBase, PandasExtensionDtype): Examples -------- - >>> pd.PeriodDtype(freq='D') + >>> pd.PeriodDtype(freq="D") period[D] >>> pd.PeriodDtype(freq=pd.offsets.MonthEnd()) @@ -1026,7 +1026,7 @@ def freq(self) -> BaseOffset: Examples -------- - >>> dtype = pd.PeriodDtype(freq='D') + >>> dtype = pd.PeriodDtype(freq="D") >>> dtype.freq """ @@ -1181,7 +1181,7 @@ class IntervalDtype(PandasExtensionDtype): Examples -------- - >>> pd.IntervalDtype(subtype='int64', closed='both') + >>> pd.IntervalDtype(subtype="int64", closed="both") interval[int64, both] """ @@ -1281,7 +1281,7 @@ def subtype(self): Examples -------- - >>> dtype = pd.IntervalDtype(subtype='int64', closed='both') + >>> dtype = pd.IntervalDtype(subtype="int64", closed="both") >>> dtype.subtype dtype('int64') """ @@ -1999,7 +1999,7 @@ def _subtype_with_str(self): >>> SparseDtype(object, 1)._subtype_with_str dtype('O') - >>> dtype = SparseDtype(str, '') + >>> dtype = SparseDtype(str, "") >>> dtype.subtype dtype('O') diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index e87b7f02b9b05..c0d9b418b9e79 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -428,7 +428,7 @@ def is_dataclass(item: object) -> bool: >>> is_dataclass(Point) False - >>> is_dataclass(Point(0,2)) + >>> is_dataclass(Point(0, 2)) True """ diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 52ec4a0b012e3..17c1ad5e4d8d9 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -129,7 +129,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: -------- Scalar arguments (including strings) result in a scalar boolean. - >>> pd.isna('dog') + >>> pd.isna("dog") False >>> pd.isna(pd.NA) @@ -150,8 +150,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: For indexes, an ndarray of booleans is returned. - >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, - ... "2017-07-08"]) + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], dtype='datetime64[ns]', freq=None) @@ -160,7 +159,7 @@ def isna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: For Series and DataFrame, the same type is returned, containing booleans. - >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) >>> df 0 1 2 0 ant bee cat @@ -411,7 +410,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: -------- Scalar arguments (including strings) result in a scalar boolean. - >>> pd.notna('dog') + >>> pd.notna("dog") True >>> pd.notna(pd.NA) @@ -432,8 +431,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: For indexes, an ndarray of booleans is returned. - >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, - ... "2017-07-08"]) + >>> index = pd.DatetimeIndex(["2017-07-05", "2017-07-06", None, "2017-07-08"]) >>> index DatetimeIndex(['2017-07-05', '2017-07-06', 'NaT', '2017-07-08'], dtype='datetime64[ns]', freq=None) @@ -442,7 +440,7 @@ def notna(obj: object) -> bool | npt.NDArray[np.bool_] | NDFrame: For Series and DataFrame, the same type is returned, containing booleans. - >>> df = pd.DataFrame([['ant', 'bee', 'cat'], ['dog', None, 'fly']]) + >>> df = pd.DataFrame([["ant", "bee", "cat"], ["dog", None, "fly"]]) >>> df 0 1 2 0 ant bee cat @@ -498,13 +496,9 @@ def array_equivalent( Examples -------- - >>> array_equivalent( - ... np.array([1, 2, np.nan]), - ... np.array([1, 2, np.nan])) + >>> array_equivalent(np.array([1, 2, np.nan]), np.array([1, 2, np.nan])) True - >>> array_equivalent( - ... np.array([1, np.nan, 2]), - ... np.array([1, 2, np.nan])) + >>> array_equivalent(np.array([1, np.nan, 2]), np.array([1, 2, np.nan])) False """ left, right = np.asarray(left), np.asarray(right) @@ -676,15 +670,15 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True): Examples -------- - >>> na_value_for_dtype(np.dtype('int64')) + >>> na_value_for_dtype(np.dtype("int64")) 0 - >>> na_value_for_dtype(np.dtype('int64'), compat=False) + >>> na_value_for_dtype(np.dtype("int64"), compat=False) nan - >>> na_value_for_dtype(np.dtype('float64')) + >>> na_value_for_dtype(np.dtype("float64")) nan - >>> na_value_for_dtype(np.dtype('bool')) + >>> na_value_for_dtype(np.dtype("bool")) False - >>> na_value_for_dtype(np.dtype('datetime64[ns]')) + >>> na_value_for_dtype(np.dtype("datetime64[ns]")) numpy.datetime64('NaT') """ diff --git a/pandas/core/flags.py b/pandas/core/flags.py index 394695e69a3d3..8dcf49745bf2d 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -41,7 +41,7 @@ class Flags: >>> df.flags - >>> df.flags['allows_duplicate_labels'] = True + >>> df.flags["allows_duplicate_labels"] = True >>> df.flags """ @@ -71,7 +71,7 @@ def allows_duplicate_labels(self) -> bool: Examples -------- - >>> df = pd.DataFrame({"A": [1, 2]}, index=['a', 'a']) + >>> df = pd.DataFrame({"A": [1, 2]}, index=["a", "a"]) >>> df.flags.allows_duplicate_labels True >>> df.flags.allows_duplicate_labels = False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 910d7b2ab2178..e48e5d9023f33 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -554,7 +554,7 @@ class DataFrame(NDFrame, OpsMixin): -------- Constructing DataFrame from a dictionary. - >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> d = {"col1": [1, 2], "col2": [3, 4]} >>> df = pd.DataFrame(data=d) >>> df col1 col2 @@ -578,7 +578,7 @@ class DataFrame(NDFrame, OpsMixin): Constructing DataFrame from a dictionary including Series: - >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])} + >>> d = {"col1": [0, 1, 2, 3], "col2": pd.Series([2, 3], index=[2, 3])} >>> pd.DataFrame(data=d, index=[0, 1, 2, 3]) col1 col2 0 0 NaN @@ -588,8 +588,9 @@ class DataFrame(NDFrame, OpsMixin): Constructing DataFrame from numpy ndarray: - >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), - ... columns=['a', 'b', 'c']) + >>> df2 = pd.DataFrame( + ... np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), columns=["a", "b", "c"] + ... ) >>> df2 a b c 0 1 2 3 @@ -598,10 +599,11 @@ class DataFrame(NDFrame, OpsMixin): Constructing DataFrame from a numpy ndarray that has labeled columns: - >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], - ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) - >>> df3 = pd.DataFrame(data, columns=['c', 'a']) - ... + >>> data = np.array( + ... [(1, 2, 3), (4, 5, 6), (7, 8, 9)], + ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")], + ... ) + >>> df3 = pd.DataFrame(data, columns=["c", "a"]) >>> df3 c a 0 3 1 @@ -926,12 +928,13 @@ def __dataframe__( Examples -------- - >>> df_not_necessarily_pandas = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df_not_necessarily_pandas = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> interchange_object = df_not_necessarily_pandas.__dataframe__() >>> interchange_object.column_names() Index(['A', 'B'], dtype='object') - >>> df_pandas = (pd.api.interchange.from_dataframe - ... (interchange_object.select_columns_by_name(['A']))) + >>> df_pandas = pd.api.interchange.from_dataframe( + ... interchange_object.select_columns_by_name(["A"]) + ... ) >>> df_pandas A 0 1 @@ -999,7 +1002,7 @@ def axes(self) -> list[Index]: Examples -------- - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df.axes [RangeIndex(start=0, stop=2, step=1), Index(['col1', 'col2'], dtype='object')] @@ -1017,12 +1020,11 @@ def shape(self) -> tuple[int, int]: Examples -------- - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df.shape (2, 2) - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4], - ... 'col3': [5, 6]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4], "col3": [5, 6]}) >>> df.shape (2, 3) """ @@ -1047,9 +1049,12 @@ def _is_homogeneous_type(self) -> bool: Items with the same type but different sizes are considered different types. - >>> DataFrame({ - ... "A": np.array([1, 2], dtype=np.int32), - ... "B": np.array([1, 2], dtype=np.int64)})._is_homogeneous_type + >>> DataFrame( + ... { + ... "A": np.array([1, 2], dtype=np.int32), + ... "B": np.array([1, 2], dtype=np.int64), + ... } + ... )._is_homogeneous_type False """ # The "<" part of "<=" here is for empty DataFrame cases @@ -1315,7 +1320,7 @@ def to_string( Examples -------- - >>> d = {'col1': [1, 2, 3], 'col2': [4, 5, 6]} + >>> d = {"col1": [1, 2, 3], "col2": [4, 5, 6]} >>> df = pd.DataFrame(d) >>> print(df.to_string()) col1 col2 @@ -1385,7 +1390,7 @@ def style(self) -> Styler: Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3]}) + >>> df = pd.DataFrame({"A": [1, 2, 3]}) >>> df.style # doctest: +SKIP Please see @@ -1482,15 +1487,15 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: Examples -------- - >>> df = pd.DataFrame([[1, 1.5]], columns=['int', 'float']) + >>> df = pd.DataFrame([[1, 1.5]], columns=["int", "float"]) >>> row = next(df.iterrows())[1] >>> row int 1.0 float 1.5 Name: 0, dtype: float64 - >>> print(row['int'].dtype) + >>> print(row["int"].dtype) float64 - >>> print(df['int'].dtype) + >>> print(df["int"].dtype) int64 """ columns = self.columns @@ -1536,15 +1541,15 @@ def itertuples( Examples -------- - >>> df = pd.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]}, - ... index=['dog', 'hawk']) + >>> df = pd.DataFrame( + ... {"num_legs": [4, 2], "num_wings": [0, 2]}, index=["dog", "hawk"] + ... ) >>> df num_legs num_wings dog 4 0 hawk 2 2 >>> for row in df.itertuples(): ... print(row) - ... Pandas(Index='dog', num_legs=4, num_wings=0) Pandas(Index='hawk', num_legs=2, num_wings=2) @@ -1553,16 +1558,14 @@ def itertuples( >>> for row in df.itertuples(index=False): ... print(row) - ... Pandas(num_legs=4, num_wings=0) Pandas(num_legs=2, num_wings=2) With the `name` parameter set we set a custom name for the yielded namedtuples: - >>> for row in df.itertuples(name='Animal'): + >>> for row in df.itertuples(name="Animal"): ... print(row) - ... Animal(Index='dog', num_legs=4, num_wings=0) Animal(Index='hawk', num_legs=2, num_wings=2) """ @@ -1797,7 +1800,7 @@ def from_dict( -------- By default the keys of the dict become the DataFrame columns: - >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']} + >>> data = {"col_1": [3, 2, 1, 0], "col_2": ["a", "b", "c", "d"]} >>> pd.DataFrame.from_dict(data) col_1 col_2 0 3 a @@ -1808,8 +1811,8 @@ def from_dict( Specify ``orient='index'`` to create the DataFrame using dictionary keys as rows: - >>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']} - >>> pd.DataFrame.from_dict(data, orient='index') + >>> data = {"row_1": [3, 2, 1, 0], "row_2": ["a", "b", "c", "d"]} + >>> pd.DataFrame.from_dict(data, orient="index") 0 1 2 3 row_1 3 2 1 0 row_2 a b c d @@ -1817,8 +1820,7 @@ def from_dict( When using the 'index' orientation, the column names can be specified manually: - >>> pd.DataFrame.from_dict(data, orient='index', - ... columns=['A', 'B', 'C', 'D']) + >>> pd.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"]) A B C D row_1 3 2 1 0 row_2 a b c d @@ -1826,12 +1828,14 @@ def from_dict( Specify ``orient='tight'`` to create the DataFrame using a 'tight' format: - >>> data = {'index': [('a', 'b'), ('a', 'c')], - ... 'columns': [('x', 1), ('y', 2)], - ... 'data': [[1, 3], [2, 4]], - ... 'index_names': ['n1', 'n2'], - ... 'column_names': ['z1', 'z2']} - >>> pd.DataFrame.from_dict(data, orient='tight') + >>> data = { + ... "index": [("a", "b"), ("a", "c")], + ... "columns": [("x", 1), ("y", 2)], + ... "data": [[1, 3], [2, 4]], + ... "index_names": ["n1", "n2"], + ... "column_names": ["z1", "z2"], + ... } + >>> pd.DataFrame.from_dict(data, orient="tight") z1 x y z2 1 2 n1 n2 @@ -1929,7 +1933,7 @@ def to_numpy( For a mix of numeric and non-numeric types, the output array will have object dtype. - >>> df['C'] = pd.date_range('2000', periods=2) + >>> df["C"] = pd.date_range("2000", periods=2) >>> df.to_numpy() array([[1, 3.0, Timestamp('2000-01-01 00:00:00')], [2, 4.5, Timestamp('2000-01-02 00:00:00')]], dtype=object) @@ -2048,9 +2052,9 @@ def to_dict( Examples -------- - >>> df = pd.DataFrame({'col1': [1, 2], - ... 'col2': [0.5, 0.75]}, - ... index=['row1', 'row2']) + >>> df = pd.DataFrame( + ... {"col1": [1, 2], "col2": [0.5, 0.75]}, index=["row1", "row2"] + ... ) >>> df col1 col2 row1 1 0.50 @@ -2060,7 +2064,7 @@ def to_dict( You can specify the return orientation. - >>> df.to_dict('series') + >>> df.to_dict("series") {'col1': row1 1 row2 2 Name: col1, dtype: int64, @@ -2068,17 +2072,17 @@ def to_dict( row2 0.75 Name: col2, dtype: float64} - >>> df.to_dict('split') + >>> df.to_dict("split") {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], 'data': [[1, 0.5], [2, 0.75]]} - >>> df.to_dict('records') + >>> df.to_dict("records") [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}] - >>> df.to_dict('index') + >>> df.to_dict("index") {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} - >>> df.to_dict('tight') + >>> df.to_dict("tight") {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], 'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]} @@ -2092,7 +2096,7 @@ def to_dict( If you want a `defaultdict`, you need to initialize it: >>> dd = defaultdict(list) - >>> df.to_dict('records', into=dd) + >>> df.to_dict("records", into=dd) [defaultdict(, {'col1': 1, 'col2': 0.5}), defaultdict(, {'col1': 2, 'col2': 0.75})] """ @@ -2153,8 +2157,10 @@ def from_records( -------- Data can be provided as a structured ndarray: - >>> data = np.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')], - ... dtype=[('col_1', 'i4'), ('col_2', 'U1')]) + >>> data = np.array( + ... [(3, "a"), (2, "b"), (1, "c"), (0, "d")], + ... dtype=[("col_1", "i4"), ("col_2", "U1")], + ... ) >>> pd.DataFrame.from_records(data) col_1 col_2 0 3 a @@ -2164,10 +2170,12 @@ def from_records( Data can be provided as a list of dicts: - >>> data = [{'col_1': 3, 'col_2': 'a'}, - ... {'col_1': 2, 'col_2': 'b'}, - ... {'col_1': 1, 'col_2': 'c'}, - ... {'col_1': 0, 'col_2': 'd'}] + >>> data = [ + ... {"col_1": 3, "col_2": "a"}, + ... {"col_1": 2, "col_2": "b"}, + ... {"col_1": 1, "col_2": "c"}, + ... {"col_1": 0, "col_2": "d"}, + ... ] >>> pd.DataFrame.from_records(data) col_1 col_2 0 3 a @@ -2177,8 +2185,8 @@ def from_records( Data can be provided as a list of tuples with corresponding columns: - >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')] - >>> pd.DataFrame.from_records(data, columns=['col_1', 'col_2']) + >>> data = [(3, "a"), (2, "b"), (1, "c"), (0, "d")] + >>> pd.DataFrame.from_records(data, columns=["col_1", "col_2"]) col_1 col_2 0 3 a 1 2 b @@ -2367,8 +2375,7 @@ def to_records( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [0.5, 0.75]}, - ... index=['a', 'b']) + >>> df = pd.DataFrame({"A": [1, 2], "B": [0.5, 0.75]}, index=["a", "b"]) >>> df A B a 1 0.50 @@ -2639,10 +2646,10 @@ def to_stata( Examples -------- - >>> df = pd.DataFrame({{'animal': ['falcon', 'parrot', 'falcon', - ... 'parrot'], - ... 'speed': [350, 18, 361, 15]}}) - >>> df.to_stata('animals.dta') # doctest: +SKIP + >>> df = pd.DataFrame( + ... [["falcon", 350], ["parrot", 18]], columns=["animal", "parrot"] + ... ) + >>> df.to_stata("animals.dta") # doctest: +SKIP """ if version not in (114, 117, 118, 119, None): raise ValueError("Only formats 114, 117, 118 and 119 are supported.") @@ -2869,10 +2876,9 @@ def to_parquet( Examples -------- - >>> df = pd.DataFrame(data={{'col1': [1, 2], 'col2': [3, 4]}}) - >>> df.to_parquet('df.parquet.gzip', - ... compression='gzip') # doctest: +SKIP - >>> pd.read_parquet('df.parquet.gzip') # doctest: +SKIP + >>> df = pd.DataFrame(data={{"col1": [1, 2], "col2": [3, 4]}}) + >>> df.to_parquet("df.parquet.gzip", compression="gzip") # doctest: +SKIP + >>> pd.read_parquet("df.parquet.gzip") # doctest: +SKIP col1 col2 0 1 3 1 2 4 @@ -2967,9 +2973,9 @@ def to_orc( Examples -------- - >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [4, 3]}) - >>> df.to_orc('df.orc') # doctest: +SKIP - >>> pd.read_orc('df.orc') # doctest: +SKIP + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) + >>> df.to_orc("df.orc") # doctest: +SKIP + >>> pd.read_orc("df.orc") # doctest: +SKIP col1 col2 0 1 4 1 2 3 @@ -3110,7 +3116,7 @@ def to_html( Examples -------- - >>> df = pd.DataFrame(data={'col1': [1, 2], 'col2': [4, 3]}) + >>> df = pd.DataFrame(data={"col1": [1, 2], "col2": [4, 3]}) >>> html_string = ''' ... ... @@ -3315,9 +3321,10 @@ def to_xml( Examples -------- - >>> df = pd.DataFrame({{'shape': ['square', 'circle', 'triangle'], - ... 'degrees': [360, 360, 180], - ... 'sides': [4, np.nan, 3]}}) + >>> df = pd.DataFrame( + ... [["square", 360, 4], ["circle", 360, np.nan], ["triangle", 180, 3]], + ... columns=["shape", "degrees", "sides"], + ... ) >>> df.to_xml() # doctest: +SKIP @@ -3342,9 +3349,9 @@ def to_xml( - >>> df.to_xml(attr_cols=[ - ... 'index', 'shape', 'degrees', 'sides' - ... ]) # doctest: +SKIP + >>> df.to_xml( + ... attr_cols=["index", "shape", "degrees", "sides"] + ... ) # doctest: +SKIP @@ -3352,8 +3359,9 @@ def to_xml( - >>> df.to_xml(namespaces={{"doc": "https://example.com"}}, - ... prefix="doc") # doctest: +SKIP + >>> df.to_xml( + ... namespaces={{"doc": "https://example.com"}}, prefix="doc" + ... ) # doctest: +SKIP @@ -3485,9 +3493,8 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: Examples -------- - >>> dtypes = ['int64', 'float64', 'complex128', 'object', 'bool'] - >>> data = dict([(t, np.ones(shape=5000, dtype=int).astype(t)) - ... for t in dtypes]) + >>> dtypes = ["int64", "float64", "complex128", "object", "bool"] + >>> data = dict([(t, np.ones(shape=5000, dtype=int).astype(t)) for t in dtypes]) >>> df = pd.DataFrame(data) >>> df.head() int64 float64 complex128 object bool @@ -3528,7 +3535,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: Use a Categorical for efficient storage of an object-dtype column with many repeated values. - >>> df['object'].astype('category').memory_usage(deep=True) + >>> df["object"].astype("category").memory_usage(deep=True) 5136 """ result = self._constructor_sliced( @@ -3593,7 +3600,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: -------- **Square DataFrame with homogeneous dtype** - >>> d1 = {'col1': [1, 2], 'col2': [3, 4]} + >>> d1 = {"col1": [1, 2], "col2": [3, 4]} >>> df1 = pd.DataFrame(data=d1) >>> df1 col1 col2 @@ -3620,10 +3627,12 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: **Non-square DataFrame with mixed dtypes** - >>> d2 = {'name': ['Alice', 'Bob'], - ... 'score': [9.5, 8], - ... 'employed': [False, True], - ... 'kids': [0, 0]} + >>> d2 = { + ... "name": ["Alice", "Bob"], + ... "score": [9.5, 8], + ... "employed": [False, True], + ... "kids": [0, 0], + ... } >>> df2 = pd.DataFrame(data=d2) >>> df2 name score employed kids @@ -3743,7 +3752,7 @@ def T(self) -> DataFrame: Examples -------- - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df col1 col2 0 1 3 @@ -4477,9 +4486,9 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No Examples -------- - >>> df = pd.DataFrame({'A': range(1, 6), - ... 'B': range(10, 0, -2), - ... 'C C': range(10, 5, -1)}) + >>> df = pd.DataFrame( + ... {"A": range(1, 6), "B": range(10, 0, -2), "C C": range(10, 5, -1)} + ... ) >>> df A B C C 0 1 10 10 @@ -4487,7 +4496,7 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No 2 3 6 8 3 4 4 7 4 5 2 6 - >>> df.query('A > B') + >>> df.query("A > B") A B C C 4 5 2 6 @@ -4499,13 +4508,13 @@ def query(self, expr: str, *, inplace: bool = False, **kwargs) -> DataFrame | No For columns with spaces in their name, you can use backtick quoting. - >>> df.query('B == `C C`') + >>> df.query("B == `C C`") A B C C 0 1 10 10 The previous expression is equivalent to - >>> df[df.B == df['C C']] + >>> df[df.B == df["C C"]] A B C C 0 1 10 10 """ @@ -4581,7 +4590,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: Examples -------- - >>> df = pd.DataFrame({'A': range(1, 6), 'B': range(10, 0, -2)}) + >>> df = pd.DataFrame({"A": range(1, 6), "B": range(10, 0, -2)}) >>> df A B 0 1 10 @@ -4589,7 +4598,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: 2 3 6 3 4 4 4 5 2 - >>> df.eval('A + B') + >>> df.eval("A + B") 0 11 1 10 2 9 @@ -4600,7 +4609,7 @@ def eval(self, expr: str, *, inplace: bool = False, **kwargs) -> Any | None: Assignment is allowed though by default the original DataFrame is not modified. - >>> df.eval('C = A + B') + >>> df.eval("C = A + B") A B C 0 1 10 11 1 2 8 10 @@ -4687,9 +4696,9 @@ def select_dtypes(self, include=None, exclude=None) -> Self: Examples -------- - >>> df = pd.DataFrame({'a': [1, 2] * 3, - ... 'b': [True, False] * 3, - ... 'c': [1.0, 2.0] * 3}) + >>> df = pd.DataFrame( + ... {"a": [1, 2] * 3, "b": [True, False] * 3, "c": [1.0, 2.0] * 3} + ... ) >>> df a b c 0 1 True 1.0 @@ -4699,7 +4708,7 @@ def select_dtypes(self, include=None, exclude=None) -> Self: 4 1 True 1.0 5 2 False 2.0 - >>> df.select_dtypes(include='bool') + >>> df.select_dtypes(include="bool") b 0 True 1 False @@ -4708,7 +4717,7 @@ def select_dtypes(self, include=None, exclude=None) -> Self: 4 True 5 False - >>> df.select_dtypes(include=['float64']) + >>> df.select_dtypes(include=["float64"]) c 0 1.0 1 2.0 @@ -4717,7 +4726,7 @@ def select_dtypes(self, include=None, exclude=None) -> Self: 4 1.0 5 2.0 - >>> df.select_dtypes(exclude=['int64']) + >>> df.select_dtypes(exclude=["int64"]) b c 0 True 1.0 1 False 2.0 @@ -4816,7 +4825,7 @@ def insert( Examples -------- - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df col1 col2 0 1 3 @@ -4896,8 +4905,7 @@ def assign(self, **kwargs) -> DataFrame: Examples -------- - >>> df = pd.DataFrame({'temp_c': [17.0, 25.0]}, - ... index=['Portland', 'Berkeley']) + >>> df = pd.DataFrame({"temp_c": [17.0, 25.0]}, index=["Portland", "Berkeley"]) >>> df temp_c Portland 17.0 @@ -4913,7 +4921,7 @@ def assign(self, **kwargs) -> DataFrame: Alternatively, the same behavior can be achieved by directly referencing an existing Series or sequence: - >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32) + >>> df.assign(temp_f=df["temp_c"] * 9 / 5 + 32) temp_c temp_f Portland 17.0 62.6 Berkeley 25.0 77.0 @@ -4921,8 +4929,10 @@ def assign(self, **kwargs) -> DataFrame: You can create multiple columns within the same assign where one of the columns depends on another one defined within the same assign: - >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32, - ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9) + >>> df.assign( + ... temp_f=lambda x: x["temp_c"] * 9 / 5 + 32, + ... temp_k=lambda x: (x["temp_f"] + 459.67) * 5 / 9, + ... ) temp_c temp_f temp_k Portland 17.0 62.6 290.15 Berkeley 25.0 77.0 298.15 @@ -5189,8 +5199,7 @@ def drop( Examples -------- - >>> df = pd.DataFrame(np.arange(12).reshape(3, 4), - ... columns=['A', 'B', 'C', 'D']) + >>> df = pd.DataFrame(np.arange(12).reshape(3, 4), columns=["A", "B", "C", "D"]) >>> df A B C D 0 0 1 2 3 @@ -5199,13 +5208,13 @@ def drop( Drop columns - >>> df.drop(['B', 'C'], axis=1) + >>> df.drop(["B", "C"], axis=1) A D 0 0 3 1 4 7 2 8 11 - >>> df.drop(columns=['B', 'C']) + >>> df.drop(columns=["B", "C"]) A D 0 0 3 1 4 7 @@ -5219,14 +5228,25 @@ def drop( Drop columns and/or rows of MultiIndex DataFrame - >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], - ... ['speed', 'weight', 'length']], - ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], - ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) - >>> df = pd.DataFrame(index=midx, columns=['big', 'small'], - ... data=[[45, 30], [200, 100], [1.5, 1], [30, 20], - ... [250, 150], [1.5, 0.8], [320, 250], - ... [1, 0.8], [0.3, 0.2]]) + >>> midx = pd.MultiIndex( + ... levels=[["llama", "cow", "falcon"], ["speed", "weight", "length"]], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], + ... ) + >>> df = pd.DataFrame( + ... index=midx, + ... columns=["big", "small"], + ... data=[ + ... [45, 30], + ... [200, 100], + ... [1.5, 1], + ... [30, 20], + ... [250, 150], + ... [1.5, 0.8], + ... [320, 250], + ... [1, 0.8], + ... [0.3, 0.2], + ... ], + ... ) >>> df big small llama speed 45.0 30.0 @@ -5243,7 +5263,7 @@ def drop( DataFrame, i.e., drop the combination ``'falcon'`` and ``'weight'``, which deletes only the corresponding row - >>> df.drop(index=('falcon', 'weight')) + >>> df.drop(index=("falcon", "weight")) big small llama speed 45.0 30.0 weight 200.0 100.0 @@ -5254,7 +5274,7 @@ def drop( falcon speed 320.0 250.0 length 0.3 0.2 - >>> df.drop(index='cow', columns='small') + >>> df.drop(index="cow", columns="small") big llama speed 45.0 weight 200.0 @@ -5263,7 +5283,7 @@ def drop( weight 1.0 length 0.3 - >>> df.drop(index='length', level=1) + >>> df.drop(index="length", level=1) big small llama speed 45.0 30.0 weight 200.0 100.0 @@ -5446,13 +5466,13 @@ def rename( Using axis-style parameters: - >>> df.rename(str.lower, axis='columns') + >>> df.rename(str.lower, axis="columns") a b 0 1 4 1 2 5 2 3 6 - >>> df.rename({1: 2, 2: 4}, axis='index') + >>> df.rename({1: 2, 2: 4}, axis="index") A B 0 1 4 2 2 5 @@ -5484,11 +5504,15 @@ def pop(self, item: Hashable) -> Series: Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', np.nan)], - ... columns=('name', 'class', 'max_speed')) + >>> df = pd.DataFrame( + ... [ + ... ("falcon", "bird", 389.0), + ... ("parrot", "bird", 24.0), + ... ("lion", "mammal", 80.5), + ... ("monkey", "mammal", np.nan), + ... ], + ... columns=("name", "class", "max_speed"), + ... ) >>> df name class max_speed 0 falcon bird 389.0 @@ -5496,7 +5520,7 @@ def pop(self, item: Hashable) -> Series: 2 lion mammal 80.5 3 monkey mammal NaN - >>> df.pop('class') + >>> df.pop("class") 0 bird 1 bird 2 mammal @@ -5729,9 +5753,13 @@ def set_index( Examples -------- - >>> df = pd.DataFrame({'month': [1, 4, 7, 10], - ... 'year': [2012, 2014, 2013, 2014], - ... 'sale': [55, 40, 84, 31]}) + >>> df = pd.DataFrame( + ... { + ... "month": [1, 4, 7, 10], + ... "year": [2012, 2014, 2013, 2014], + ... "sale": [55, 40, 84, 31], + ... } + ... ) >>> df month year sale 0 1 2012 55 @@ -5741,7 +5769,7 @@ def set_index( Set the index to become the 'month' column: - >>> df.set_index('month') + >>> df.set_index("month") year sale month 1 2012 55 @@ -5751,7 +5779,7 @@ def set_index( Create a MultiIndex using columns 'year' and 'month': - >>> df.set_index(['year', 'month']) + >>> df.set_index(["year", "month"]) sale year month 2012 1 55 @@ -5761,7 +5789,7 @@ def set_index( Create a MultiIndex using an Index and a column: - >>> df.set_index([pd.Index([1, 2, 3, 4]), 'year']) + >>> df.set_index([pd.Index([1, 2, 3, 4]), "year"]) month sale year 1 2012 1 55 @@ -5987,12 +6015,11 @@ def reset_index( Examples -------- - >>> df = pd.DataFrame([('bird', 389.0), - ... ('bird', 24.0), - ... ('mammal', 80.5), - ... ('mammal', np.nan)], - ... index=['falcon', 'parrot', 'lion', 'monkey'], - ... columns=('class', 'max_speed')) + >>> df = pd.DataFrame( + ... [("bird", 389.0), ("bird", 24.0), ("mammal", 80.5), ("mammal", np.nan)], + ... index=["falcon", "parrot", "lion", "monkey"], + ... columns=("class", "max_speed"), + ... ) >>> df class max_speed falcon bird 389.0 @@ -6022,19 +6049,21 @@ class max_speed You can also use `reset_index` with `MultiIndex`. - >>> index = pd.MultiIndex.from_tuples([('bird', 'falcon'), - ... ('bird', 'parrot'), - ... ('mammal', 'lion'), - ... ('mammal', 'monkey')], - ... names=['class', 'name']) - >>> columns = pd.MultiIndex.from_tuples([('speed', 'max'), - ... ('species', 'type')]) - >>> df = pd.DataFrame([(389.0, 'fly'), - ... (24.0, 'fly'), - ... (80.5, 'run'), - ... (np.nan, 'jump')], - ... index=index, - ... columns=columns) + >>> index = pd.MultiIndex.from_tuples( + ... [ + ... ("bird", "falcon"), + ... ("bird", "parrot"), + ... ("mammal", "lion"), + ... ("mammal", "monkey"), + ... ], + ... names=["class", "name"], + ... ) + >>> columns = pd.MultiIndex.from_tuples([("speed", "max"), ("species", "type")]) + >>> df = pd.DataFrame( + ... [(389.0, "fly"), (24.0, "fly"), (80.5, "run"), (np.nan, "jump")], + ... index=index, + ... columns=columns, + ... ) >>> df speed species max type @@ -6046,7 +6075,7 @@ class name Using the `names` parameter, choose a name for the index column: - >>> df.reset_index(names=['classes', 'names']) + >>> df.reset_index(names=["classes", "names"]) classes names speed species max type 0 bird falcon 389.0 fly @@ -6056,7 +6085,7 @@ class name If the index has multiple levels, we can reset a subset of them: - >>> df.reset_index(level='class') + >>> df.reset_index(level="class") class speed species max type name @@ -6068,7 +6097,7 @@ class speed species If we are not dropping the index, by default, it is placed in the top level. We can place it in another level: - >>> df.reset_index(level='class', col_level=1) + >>> df.reset_index(level="class", col_level=1) speed species class max type name @@ -6080,7 +6109,7 @@ class max type When the index is inserted under another level, we can specify under which one with the parameter `col_fill`: - >>> df.reset_index(level='class', col_level=1, col_fill='species') + >>> df.reset_index(level="class", col_level=1, col_fill="species") species speed species class max type name @@ -6091,7 +6120,7 @@ class max type If we specify a nonexistent level for `col_fill`, it is created: - >>> df.reset_index(level='class', col_level=1, col_fill='genus') + >>> df.reset_index(level="class", col_level=1, col_fill="genus") genus speed species class max type name @@ -6287,10 +6316,13 @@ def dropna( Examples -------- - >>> df = pd.DataFrame({"name": ['Alfred', 'Batman', 'Catwoman'], - ... "toy": [np.nan, 'Batmobile', 'Bullwhip'], - ... "born": [pd.NaT, pd.Timestamp("1940-04-25"), - ... pd.NaT]}) + >>> df = pd.DataFrame( + ... { + ... "name": ["Alfred", "Batman", "Catwoman"], + ... "toy": [np.nan, "Batmobile", "Bullwhip"], + ... "born": [pd.NaT, pd.Timestamp("1940-04-25"), pd.NaT], + ... } + ... ) >>> df name toy born 0 Alfred NaN NaT @@ -6305,7 +6337,7 @@ def dropna( Drop the columns where at least one element is missing. - >>> df.dropna(axis='columns') + >>> df.dropna(axis="columns") name 0 Alfred 1 Batman @@ -6313,7 +6345,7 @@ def dropna( Drop the rows where all elements are missing. - >>> df.dropna(how='all') + >>> df.dropna(how="all") name toy born 0 Alfred NaN NaT 1 Batman Batmobile 1940-04-25 @@ -6328,7 +6360,7 @@ def dropna( Define in which columns to look for missing values. - >>> df.dropna(subset=['name', 'toy']) + >>> df.dropna(subset=["name", "toy"]) name toy born 1 Batman Batmobile 1940-04-25 2 Catwoman Bullwhip NaT @@ -6463,11 +6495,13 @@ def drop_duplicates( -------- Consider dataset containing ramen rating. - >>> df = pd.DataFrame({ - ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], - ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], - ... 'rating': [4, 4, 3.5, 15, 5] - ... }) + >>> df = pd.DataFrame( + ... { + ... "brand": ["Yum Yum", "Yum Yum", "Indomie", "Indomie", "Indomie"], + ... "style": ["cup", "cup", "cup", "pack", "pack"], + ... "rating": [4, 4, 3.5, 15, 5], + ... } + ... ) >>> df brand style rating 0 Yum Yum cup 4.0 @@ -6487,14 +6521,14 @@ def drop_duplicates( To remove duplicates on specific column(s), use ``subset``. - >>> df.drop_duplicates(subset=['brand']) + >>> df.drop_duplicates(subset=["brand"]) brand style rating 0 Yum Yum cup 4.0 2 Indomie cup 3.5 To remove duplicates and keep last occurrences, use ``keep``. - >>> df.drop_duplicates(subset=['brand', 'style'], keep='last') + >>> df.drop_duplicates(subset=["brand", "style"], keep="last") brand style rating 1 Yum Yum cup 4.0 2 Indomie cup 3.5 @@ -6554,11 +6588,13 @@ def duplicated( -------- Consider dataset containing ramen rating. - >>> df = pd.DataFrame({ - ... 'brand': ['Yum Yum', 'Yum Yum', 'Indomie', 'Indomie', 'Indomie'], - ... 'style': ['cup', 'cup', 'cup', 'pack', 'pack'], - ... 'rating': [4, 4, 3.5, 15, 5] - ... }) + >>> df = pd.DataFrame( + ... { + ... "brand": ["Yum Yum", "Yum Yum", "Indomie", "Indomie", "Indomie"], + ... "style": ["cup", "cup", "cup", "pack", "pack"], + ... "rating": [4, 4, 3.5, 15, 5], + ... } + ... ) >>> df brand style rating 0 Yum Yum cup 4.0 @@ -6581,7 +6617,7 @@ def duplicated( By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True. - >>> df.duplicated(keep='last') + >>> df.duplicated(keep="last") 0 True 1 False 2 False @@ -6601,7 +6637,7 @@ def duplicated( To find duplicates on specific column(s), use ``subset``. - >>> df.duplicated(subset=['brand']) + >>> df.duplicated(subset=["brand"]) 0 False 1 True 2 False @@ -6747,12 +6783,14 @@ def sort_values( Examples -------- - >>> df = pd.DataFrame({ - ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], - ... 'col2': [2, 1, 9, 8, 7, 4], - ... 'col3': [0, 1, 9, 4, 2, 3], - ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] - ... }) + >>> df = pd.DataFrame( + ... { + ... "col1": ["A", "A", "B", np.nan, "D", "C"], + ... "col2": [2, 1, 9, 8, 7, 4], + ... "col3": [0, 1, 9, 4, 2, 3], + ... "col4": ["a", "B", "c", "D", "e", "F"], + ... } + ... ) >>> df col1 col2 col3 col4 0 A 2 0 a @@ -6764,7 +6802,7 @@ def sort_values( Sort by col1 - >>> df.sort_values(by=['col1']) + >>> df.sort_values(by=["col1"]) col1 col2 col3 col4 0 A 2 0 a 1 A 1 1 B @@ -6775,7 +6813,7 @@ def sort_values( Sort by multiple columns - >>> df.sort_values(by=['col1', 'col2']) + >>> df.sort_values(by=["col1", "col2"]) col1 col2 col3 col4 1 A 1 1 B 0 A 2 0 a @@ -6786,7 +6824,7 @@ def sort_values( Sort Descending - >>> df.sort_values(by='col1', ascending=False) + >>> df.sort_values(by="col1", ascending=False) col1 col2 col3 col4 4 D 7 2 e 5 C 4 3 F @@ -6797,7 +6835,7 @@ def sort_values( Putting NAs first - >>> df.sort_values(by='col1', ascending=False, na_position='first') + >>> df.sort_values(by="col1", ascending=False, na_position="first") col1 col2 col3 col4 3 NaN 8 4 D 4 D 7 2 e @@ -6808,7 +6846,7 @@ def sort_values( Sorting with a key function - >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) + >>> df.sort_values(by="col4", key=lambda col: col.str.lower()) col1 col2 col3 col4 0 A 2 0 a 1 A 1 1 B @@ -6820,10 +6858,12 @@ def sort_values( Natural sort with the key argument, using the `natsort ` package. - >>> df = pd.DataFrame({ - ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], - ... "value": [10, 20, 30, 40, 50] - ... }) + >>> df = pd.DataFrame( + ... { + ... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"], + ... "value": [10, 20, 30, 40, 50], + ... } + ... ) >>> df time value 0 0hr 10 @@ -6833,8 +6873,7 @@ def sort_values( 4 96hr 50 >>> from natsort import index_natsorted >>> df.sort_values( - ... by="time", - ... key=lambda x: np.argsort(index_natsorted(df["time"])) + ... by="time", key=lambda x: np.argsort(index_natsorted(df["time"])) ... ) time value 0 0hr 10 @@ -7035,8 +7074,9 @@ def sort_index( Examples -------- - >>> df = pd.DataFrame([1, 2, 3, 4, 5], index=[100, 29, 234, 1, 150], - ... columns=['A']) + >>> df = pd.DataFrame( + ... [1, 2, 3, 4, 5], index=[100, 29, 234, 1, 150], columns=["A"] + ... ) >>> df.sort_index() A 1 4 @@ -7059,7 +7099,7 @@ def sort_index( A key function can be specified which is applied to the index before sorting. For a ``MultiIndex`` this is applied to each level separately. - >>> df = pd.DataFrame({"a": [1, 2, 3, 4]}, index=['A', 'b', 'C', 'd']) + >>> df = pd.DataFrame({"a": [1, 2, 3, 4]}, index=["A", "b", "C", "d"]) >>> df.sort_index(key=lambda x: x.str.lower()) a A 1 @@ -7123,9 +7163,10 @@ def value_counts( Examples -------- - >>> df = pd.DataFrame({'num_legs': [2, 4, 4, 6], - ... 'num_wings': [2, 0, 0, 0]}, - ... index=['falcon', 'dog', 'cat', 'ant']) + >>> df = pd.DataFrame( + ... {"num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]}, + ... index=["falcon", "dog", "cat", "ant"], + ... ) >>> df num_legs num_wings falcon 2 2 @@ -7163,8 +7204,12 @@ def value_counts( With `dropna` set to `False` we can also count rows with NA values. - >>> df = pd.DataFrame({'first_name': ['John', 'Anne', 'John', 'Beth'], - ... 'middle_name': ['Smith', pd.NA, pd.NA, 'Louise']}) + >>> df = pd.DataFrame( + ... { + ... "first_name": ["John", "Anne", "John", "Beth"], + ... "middle_name": ["Smith", pd.NA, pd.NA, "Louise"], + ... } + ... ) >>> df first_name middle_name 0 John Smith @@ -7262,16 +7307,34 @@ def nlargest( Examples -------- - >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, - ... 434000, 434000, 337000, 11300, - ... 11300, 11300], - ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, - ... 17036, 182, 38, 311], - ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", - ... "IS", "NR", "TV", "AI"]}, - ... index=["Italy", "France", "Malta", - ... "Maldives", "Brunei", "Iceland", - ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df = pd.DataFrame( + ... { + ... "population": [ + ... 59000000, + ... 65000000, + ... 434000, + ... 434000, + ... 434000, + ... 337000, + ... 11300, + ... 11300, + ... 11300, + ... ], + ... "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311], + ... "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"], + ... }, + ... index=[ + ... "Italy", + ... "France", + ... "Malta", + ... "Maldives", + ... "Brunei", + ... "Iceland", + ... "Nauru", + ... "Tuvalu", + ... "Anguilla", + ... ], + ... ) >>> df population GDP alpha-2 Italy 59000000 1937894 IT @@ -7287,7 +7350,7 @@ def nlargest( In the following example, we will use ``nlargest`` to select the three rows having the largest values in column "population". - >>> df.nlargest(3, 'population') + >>> df.nlargest(3, "population") population GDP alpha-2 France 65000000 2583560 FR Italy 59000000 1937894 IT @@ -7295,7 +7358,7 @@ def nlargest( When using ``keep='last'``, ties are resolved in reverse order: - >>> df.nlargest(3, 'population', keep='last') + >>> df.nlargest(3, "population", keep="last") population GDP alpha-2 France 65000000 2583560 FR Italy 59000000 1937894 IT @@ -7305,7 +7368,7 @@ def nlargest( if there are duplicate values for the smallest element, all the ties are kept: - >>> df.nlargest(3, 'population', keep='all') + >>> df.nlargest(3, "population", keep="all") population GDP alpha-2 France 65000000 2583560 FR Italy 59000000 1937894 IT @@ -7315,7 +7378,7 @@ def nlargest( However, ``nlargest`` does not keep ``n`` distinct largest elements: - >>> df.nlargest(5, 'population', keep='all') + >>> df.nlargest(5, "population", keep="all") population GDP alpha-2 France 65000000 2583560 FR Italy 59000000 1937894 IT @@ -7326,7 +7389,7 @@ def nlargest( To order by the largest values in column "population" and then "GDP", we can specify multiple columns like in the next example. - >>> df.nlargest(3, ['population', 'GDP']) + >>> df.nlargest(3, ["population", "GDP"]) population GDP alpha-2 France 65000000 2583560 FR Italy 59000000 1937894 IT @@ -7375,16 +7438,34 @@ def nsmallest( Examples -------- - >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000, - ... 434000, 434000, 337000, 337000, - ... 11300, 11300], - ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128, - ... 17036, 182, 38, 311], - ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN", - ... "IS", "NR", "TV", "AI"]}, - ... index=["Italy", "France", "Malta", - ... "Maldives", "Brunei", "Iceland", - ... "Nauru", "Tuvalu", "Anguilla"]) + >>> df = pd.DataFrame( + ... { + ... "population": [ + ... 59000000, + ... 65000000, + ... 434000, + ... 434000, + ... 434000, + ... 337000, + ... 337000, + ... 11300, + ... 11300, + ... ], + ... "GDP": [1937894, 2583560, 12011, 4520, 12128, 17036, 182, 38, 311], + ... "alpha-2": ["IT", "FR", "MT", "MV", "BN", "IS", "NR", "TV", "AI"], + ... }, + ... index=[ + ... "Italy", + ... "France", + ... "Malta", + ... "Maldives", + ... "Brunei", + ... "Iceland", + ... "Nauru", + ... "Tuvalu", + ... "Anguilla", + ... ], + ... ) >>> df population GDP alpha-2 Italy 59000000 1937894 IT @@ -7400,7 +7481,7 @@ def nsmallest( In the following example, we will use ``nsmallest`` to select the three rows having the smallest values in column "population". - >>> df.nsmallest(3, 'population') + >>> df.nsmallest(3, "population") population GDP alpha-2 Tuvalu 11300 38 TV Anguilla 11300 311 AI @@ -7408,7 +7489,7 @@ def nsmallest( When using ``keep='last'``, ties are resolved in reverse order: - >>> df.nsmallest(3, 'population', keep='last') + >>> df.nsmallest(3, "population", keep="last") population GDP alpha-2 Anguilla 11300 311 AI Tuvalu 11300 38 TV @@ -7418,7 +7499,7 @@ def nsmallest( if there are duplicate values for the largest element, all the ties are kept. - >>> df.nsmallest(3, 'population', keep='all') + >>> df.nsmallest(3, "population", keep="all") population GDP alpha-2 Tuvalu 11300 38 TV Anguilla 11300 311 AI @@ -7428,7 +7509,7 @@ def nsmallest( However, ``nsmallest`` does not keep ``n`` distinct smallest elements: - >>> df.nsmallest(4, 'population', keep='all') + >>> df.nsmallest(4, "population", keep="all") population GDP alpha-2 Tuvalu 11300 38 TV Anguilla 11300 311 AI @@ -7438,7 +7519,7 @@ def nsmallest( To order by the smallest values in column "population" and then "GDP", we can specify multiple columns like in the next example. - >>> df.nsmallest(3, ['population', 'GDP']) + >>> df.nsmallest(3, ["population", "GDP"]) population GDP alpha-2 Tuvalu 11300 38 TV Anguilla 11300 311 AI @@ -8323,8 +8404,8 @@ def combine( -------- Combine using a simple function that chooses the smaller column. - >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({"A": [0, 0], "B": [4, 4]}) + >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]}) >>> take_smaller = lambda s1, s2: s1 if s1.sum() < s2.sum() else s2 >>> df1.combine(df2, take_smaller) A B @@ -8333,8 +8414,8 @@ def combine( Example using a true element-wise combine function. - >>> df1 = pd.DataFrame({'A': [5, 0], 'B': [2, 4]}) - >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({"A": [5, 0], "B": [2, 4]}) + >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]}) >>> df1.combine(df2, np.minimum) A B 0 1 2 @@ -8343,8 +8424,8 @@ def combine( Using `fill_value` fills Nones prior to passing the column to the merge function. - >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) - >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({"A": [0, 0], "B": [None, 4]}) + >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) A B 0 0 -5.0 @@ -8353,8 +8434,8 @@ def combine( However, if the same element in both dataframes is None, that None is preserved - >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [None, 4]}) - >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [None, 3]}) + >>> df1 = pd.DataFrame({"A": [0, 0], "B": [None, 4]}) + >>> df2 = pd.DataFrame({"A": [1, 1], "B": [None, 3]}) >>> df1.combine(df2, take_smaller, fill_value=-5) A B 0 0 -5.0 @@ -8363,8 +8444,14 @@ def combine( Example that demonstrates the use of `overwrite` and behavior when the axis differ between the dataframes. - >>> df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]}) - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [-10, 1], }, index=[1, 2]) + >>> df1 = pd.DataFrame({"A": [0, 0], "B": [4, 4]}) + >>> df2 = pd.DataFrame( + ... { + ... "B": [3, 3], + ... "C": [-10, 1], + ... }, + ... index=[1, 2], + ... ) >>> df1.combine(df2, take_smaller) A B C 0 NaN NaN NaN @@ -8379,7 +8466,13 @@ def combine( Demonstrating the preference of the passed in dataframe. - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1], }, index=[1, 2]) + >>> df2 = pd.DataFrame( + ... { + ... "B": [3, 3], + ... "C": [1, 1], + ... }, + ... index=[1, 2], + ... ) >>> df2.combine(df1, take_smaller) A B C 0 0.0 NaN NaN @@ -8489,8 +8582,8 @@ def combine_first(self, other: DataFrame) -> DataFrame: Examples -------- - >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [None, 4]}) - >>> df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]}) + >>> df1 = pd.DataFrame({"A": [None, 0], "B": [None, 4]}) + >>> df2 = pd.DataFrame({"A": [1, 1], "B": [3, 3]}) >>> df1.combine_first(df2) A B 0 1.0 3.0 @@ -8499,8 +8592,8 @@ def combine_first(self, other: DataFrame) -> DataFrame: Null values still persist if the location of that null value does not exist in `other` - >>> df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]}) - >>> df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2]) + >>> df1 = pd.DataFrame({"A": [None, 0], "B": [4, None]}) + >>> df2 = pd.DataFrame({"B": [3, 3], "C": [1, 1]}, index=[1, 2]) >>> df1.combine_first(df2) A B C 0 NaN 4.0 NaN @@ -8599,10 +8692,8 @@ def update( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3], - ... 'B': [400, 500, 600]}) - >>> new_df = pd.DataFrame({'B': [4, 5, 6], - ... 'C': [7, 8, 9]}) + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [400, 500, 600]}) + >>> new_df = pd.DataFrame({"B": [4, 5, 6], "C": [7, 8, 9]}) >>> df.update(new_df) >>> df A B @@ -8613,9 +8704,8 @@ def update( The DataFrame's length does not increase as a result of the update, only values at matching index/column labels are updated. - >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], - ... 'B': ['x', 'y', 'z']}) - >>> new_df = pd.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']}) + >>> df = pd.DataFrame({"A": ["a", "b", "c"], "B": ["x", "y", "z"]}) + >>> new_df = pd.DataFrame({"B": ["d", "e", "f", "g", "h", "i"]}) >>> df.update(new_df) >>> df A B @@ -8623,9 +8713,8 @@ def update( 1 b e 2 c f - >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], - ... 'B': ['x', 'y', 'z']}) - >>> new_df = pd.DataFrame({'B': ['d', 'f']}, index=[0, 2]) + >>> df = pd.DataFrame({"A": ["a", "b", "c"], "B": ["x", "y", "z"]}) + >>> new_df = pd.DataFrame({"B": ["d", "f"]}, index=[0, 2]) >>> df.update(new_df) >>> df A B @@ -8635,9 +8724,8 @@ def update( For Series, its name attribute must be set. - >>> df = pd.DataFrame({'A': ['a', 'b', 'c'], - ... 'B': ['x', 'y', 'z']}) - >>> new_column = pd.Series(['d', 'e', 'f'], name='B') + >>> df = pd.DataFrame({"A": ["a", "b", "c"], "B": ["x", "y", "z"]}) + >>> new_column = pd.Series(["d", "e", "f"], name="B") >>> df.update(new_column) >>> df A B @@ -8648,9 +8736,8 @@ def update( If `other` contains NaNs the corresponding values are not updated in the original dataframe. - >>> df = pd.DataFrame({'A': [1, 2, 3], - ... 'B': [400., 500., 600.]}) - >>> new_df = pd.DataFrame({'B': [4, np.nan, 6]}) + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [400.0, 500.0, 600.0]}) + >>> new_df = pd.DataFrame({"B": [4, np.nan, 6]}) >>> df.update(new_df) >>> df A B @@ -9235,9 +9322,9 @@ def stack( -------- **Single level columns** - >>> df_single_level_cols = pd.DataFrame([[0, 1], [2, 3]], - ... index=['cat', 'dog'], - ... columns=['weight', 'height']) + >>> df_single_level_cols = pd.DataFrame( + ... [[0, 1], [2, 3]], index=["cat", "dog"], columns=["weight", "height"] + ... ) Stacking a dataframe with a single level column axis returns a Series: @@ -9254,11 +9341,12 @@ def stack( **Multi level columns: simple case** - >>> multicol1 = pd.MultiIndex.from_tuples([('weight', 'kg'), - ... ('weight', 'pounds')]) - >>> df_multi_level_cols1 = pd.DataFrame([[1, 2], [2, 4]], - ... index=['cat', 'dog'], - ... columns=multicol1) + >>> multicol1 = pd.MultiIndex.from_tuples( + ... [("weight", "kg"), ("weight", "pounds")] + ... ) + >>> df_multi_level_cols1 = pd.DataFrame( + ... [[1, 2], [2, 4]], index=["cat", "dog"], columns=multicol1 + ... ) Stacking a dataframe with a multi-level column axis: @@ -9276,11 +9364,10 @@ def stack( **Missing values** - >>> multicol2 = pd.MultiIndex.from_tuples([('weight', 'kg'), - ... ('height', 'm')]) - >>> df_multi_level_cols2 = pd.DataFrame([[1.0, 2.0], [3.0, 4.0]], - ... index=['cat', 'dog'], - ... columns=multicol2) + >>> multicol2 = pd.MultiIndex.from_tuples([("weight", "kg"), ("height", "m")]) + >>> df_multi_level_cols2 = pd.DataFrame( + ... [[1.0, 2.0], [3.0, 4.0]], index=["cat", "dog"], columns=multicol2 + ... ) It is common to have missing values when stacking a dataframe with multi-level columns, as the stacked dataframe typically @@ -9434,9 +9521,13 @@ def explode( Examples -------- - >>> df = pd.DataFrame({'A': [[0, 1, 2], 'foo', [], [3, 4]], - ... 'B': 1, - ... 'C': [['a', 'b', 'c'], np.nan, [], ['d', 'e']]}) + >>> df = pd.DataFrame( + ... { + ... "A": [[0, 1, 2], "foo", [], [3, 4]], + ... "B": 1, + ... "C": [["a", "b", "c"], np.nan, [], ["d", "e"]], + ... } + ... ) >>> df A B C 0 [0, 1, 2] 1 [a, b, c] @@ -9446,7 +9537,7 @@ def explode( Single-column explode. - >>> df.explode('A') + >>> df.explode("A") A B C 0 0 1 [a, b, c] 0 1 1 [a, b, c] @@ -9458,7 +9549,7 @@ def explode( Multi-column explode. - >>> df.explode(list('AC')) + >>> df.explode(list("AC")) A B C 0 0 1 a 0 1 1 b @@ -9544,8 +9635,9 @@ def unstack( Examples -------- - >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), - ... ('two', 'a'), ('two', 'b')]) + >>> index = pd.MultiIndex.from_tuples( + ... [("one", "a"), ("one", "b"), ("two", "a"), ("two", "b")] + ... ) >>> s = pd.Series(np.arange(1.0, 5.0), index=index) >>> s one a 1.0 @@ -9939,7 +10031,7 @@ def apply( Examples -------- - >>> df = pd.DataFrame([[4, 9]] * 3, columns=['A', 'B']) + >>> df = pd.DataFrame([[4, 9]] * 3, columns=["A", "B"]) >>> df A B 0 4 9 @@ -9979,7 +10071,7 @@ def apply( Passing ``result_type='expand'`` will expand list-like results to columns of a Dataframe - >>> df.apply(lambda x: [1, 2], axis=1, result_type='expand') + >>> df.apply(lambda x: [1, 2], axis=1, result_type="expand") 0 1 0 1 2 1 1 2 @@ -9989,7 +10081,7 @@ def apply( ``result_type='expand'``. The resulting column names will be the Series index. - >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1) + >>> df.apply(lambda x: pd.Series([1, 2], index=["foo", "bar"]), axis=1) foo bar 0 1 2 1 1 2 @@ -10000,7 +10092,7 @@ def apply( and broadcast it along the axis. The resulting column names will be the originals. - >>> df.apply(lambda x: [1, 2], axis=1, result_type='broadcast') + >>> df.apply(lambda x: [1, 2], axis=1, result_type="broadcast") A B 0 1 2 1 1 2 @@ -10073,7 +10165,7 @@ def map( >>> df_copy = df.copy() >>> df_copy.iloc[0, 0] = pd.NA - >>> df_copy.map(lambda x: len(str(x)), na_action='ignore') + >>> df_copy.map(lambda x: len(str(x)), na_action="ignore") 0 1 0 NaN 4 1 5.0 5 @@ -10096,7 +10188,7 @@ def map( But it's better to avoid map in that case. - >>> df ** 2 + >>> df**2 0 1 0 1.000000 4.494400 1 11.262736 20.857489 @@ -10299,8 +10391,12 @@ def join( Examples -------- - >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], - ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + >>> df = pd.DataFrame( + ... { + ... "key": ["K0", "K1", "K2", "K3", "K4", "K5"], + ... "A": ["A0", "A1", "A2", "A3", "A4", "A5"], + ... } + ... ) >>> df key A @@ -10311,8 +10407,7 @@ def join( 4 K4 A4 5 K5 A5 - >>> other = pd.DataFrame({'key': ['K0', 'K1', 'K2'], - ... 'B': ['B0', 'B1', 'B2']}) + >>> other = pd.DataFrame({"key": ["K0", "K1", "K2"], "B": ["B0", "B1", "B2"]}) >>> other key B @@ -10322,7 +10417,7 @@ def join( Join DataFrames using their indexes. - >>> df.join(other, lsuffix='_caller', rsuffix='_other') + >>> df.join(other, lsuffix="_caller", rsuffix="_other") key_caller A key_other B 0 K0 A0 K0 B0 1 K1 A1 K1 B1 @@ -10335,7 +10430,7 @@ def join( the index in both `df` and `other`. The joined DataFrame will have key as its index. - >>> df.set_index('key').join(other.set_index('key')) + >>> df.set_index("key").join(other.set_index("key")) A B key K0 A0 B0 @@ -10350,7 +10445,7 @@ def join( any column in `df`. This method preserves the original DataFrame's index in the result. - >>> df.join(other.set_index('key'), on='key') + >>> df.join(other.set_index("key"), on="key") key A B 0 K0 A0 B0 1 K1 A1 B1 @@ -10361,8 +10456,12 @@ def join( Using non-unique key values shows how they are matched. - >>> df = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'], - ... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']}) + >>> df = pd.DataFrame( + ... { + ... "key": ["K0", "K1", "K1", "K3", "K0", "K1"], + ... "A": ["A0", "A1", "A2", "A3", "A4", "A5"], + ... } + ... ) >>> df key A @@ -10373,7 +10472,7 @@ def join( 4 K0 A4 5 K1 A5 - >>> df.join(other.set_index('key'), on='key', validate='m:1') + >>> df.join(other.set_index("key"), on="key", validate="m:1") key A B 0 K0 A0 B0 1 K1 A1 B1 @@ -10529,8 +10628,10 @@ def round( Examples -------- - >>> df = pd.DataFrame([(.21, .32), (.01, .67), (.66, .03), (.21, .18)], - ... columns=['dogs', 'cats']) + >>> df = pd.DataFrame( + ... [(0.21, 0.32), (0.01, 0.67), (0.66, 0.03), (0.21, 0.18)], + ... columns=["dogs", "cats"], + ... ) >>> df dogs cats 0 0.21 0.32 @@ -10552,7 +10653,7 @@ def round( specified with the column names as key and the number of decimal places as value - >>> df.round({'dogs': 1, 'cats': 0}) + >>> df.round({"dogs": 1, "cats": 0}) dogs cats 0 0.2 0.0 1 0.0 1.0 @@ -10563,7 +10664,7 @@ def round( specified with the column names as index and the number of decimal places as value - >>> decimals = pd.Series([0, 1], index=['cats', 'dogs']) + >>> decimals = pd.Series([0, 1], index=["cats", "dogs"]) >>> df.round(decimals) dogs cats 0 0.2 0.0 @@ -10675,15 +10776,18 @@ def corr( >>> def histogram_intersection(a, b): ... v = np.minimum(a, b).sum().round(decimals=1) ... return v - >>> df = pd.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)], - ... columns=['dogs', 'cats']) + >>> df = pd.DataFrame( + ... [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], + ... columns=["dogs", "cats"], + ... ) >>> df.corr(method=histogram_intersection) dogs cats dogs 1.0 0.3 cats 0.3 1.0 - >>> df = pd.DataFrame([(1, 1), (2, np.nan), (np.nan, 3), (4, 4)], - ... columns=['dogs', 'cats']) + >>> df = pd.DataFrame( + ... [(1, 1), (2, np.nan), (np.nan, 3), (4, 4)], columns=["dogs", "cats"] + ... ) >>> df.corr(min_periods=3) dogs cats dogs 1.0 NaN @@ -10809,16 +10913,18 @@ def cov( Examples -------- - >>> df = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)], - ... columns=['dogs', 'cats']) + >>> df = pd.DataFrame( + ... [(1, 2), (0, 3), (2, 0), (1, 1)], columns=["dogs", "cats"] + ... ) >>> df.cov() dogs cats dogs 0.666667 -1.000000 cats -1.000000 1.666667 >>> np.random.seed(42) - >>> df = pd.DataFrame(np.random.randn(1000, 5), - ... columns=['a', 'b', 'c', 'd', 'e']) + >>> df = pd.DataFrame( + ... np.random.randn(1000, 5), columns=["a", "b", "c", "d", "e"] + ... ) >>> df.cov() a b c d e a 0.998438 -0.020161 0.059277 -0.008943 0.014144 @@ -10834,10 +10940,9 @@ def cov( each column pair in order to have a valid result: >>> np.random.seed(42) - >>> df = pd.DataFrame(np.random.randn(20, 3), - ... columns=['a', 'b', 'c']) - >>> df.loc[df.index[:5], 'a'] = np.nan - >>> df.loc[df.index[5:10], 'b'] = np.nan + >>> df = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"]) + >>> df.loc[df.index[:5], "a"] = np.nan + >>> df.loc[df.index[5:10], "b"] = np.nan >>> df.cov(min_periods=12) a b c a 0.316741 NaN -0.150812 @@ -10917,10 +11022,12 @@ def corrwith( -------- >>> index = ["a", "b", "c", "d", "e"] >>> columns = ["one", "two", "three", "four"] - >>> df1 = pd.DataFrame(np.arange(20).reshape(5, 4), - ... index=index, columns=columns) - >>> df2 = pd.DataFrame(np.arange(16).reshape(4, 4), - ... index=index[:4], columns=columns) + >>> df1 = pd.DataFrame( + ... np.arange(20).reshape(5, 4), index=index, columns=columns + ... ) + >>> df2 = pd.DataFrame( + ... np.arange(16).reshape(4, 4), index=index[:4], columns=columns + ... ) >>> df1.corrwith(df2) one 1.0 two 1.0 @@ -11035,10 +11142,13 @@ def count(self, axis: Axis = 0, numeric_only: bool = False): -------- Constructing DataFrame from a dictionary: - >>> df = pd.DataFrame({"Person": - ... ["John", "Myla", "Lewis", "John", "Myla"], - ... "Age": [24., np.nan, 21., 33, 26], - ... "Single": [False, True, True, True, False]}) + >>> df = pd.DataFrame( + ... { + ... "Person": ["John", "Myla", "Lewis", "John", "Myla"], + ... "Age": [24.0, np.nan, 21.0, 33, 26], + ... "Single": [False, True, True, True, False], + ... } + ... ) >>> df Person Age Single 0 John 24.0 False @@ -11057,7 +11167,7 @@ def count(self, axis: Axis = 0, numeric_only: bool = False): Counts for each **row**: - >>> df.count(axis='columns') + >>> df.count(axis="columns") 0 3 1 2 2 3 @@ -11467,7 +11577,7 @@ def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: Examples -------- - >>> df = pd.DataFrame({'A': [4, 5, 6], 'B': [4, 1, 1]}) + >>> df = pd.DataFrame({"A": [4, 5, 6], "B": [4, 1, 1]}) >>> df.nunique() A 3 B 2 @@ -11600,12 +11710,16 @@ def mode( Examples -------- - >>> df = pd.DataFrame([('bird', 2, 2), - ... ('mammal', 4, np.nan), - ... ('arthropod', 8, 0), - ... ('bird', 2, np.nan)], - ... index=('falcon', 'horse', 'spider', 'ostrich'), - ... columns=('species', 'legs', 'wings')) + >>> df = pd.DataFrame( + ... [ + ... ("bird", 2, 2), + ... ("mammal", 4, np.nan), + ... ("arthropod", 8, 0), + ... ("bird", 2, np.nan), + ... ], + ... index=("falcon", "horse", "spider", "ostrich"), + ... columns=("species", "legs", "wings"), + ... ) >>> df species legs wings falcon bird 2 2.0 @@ -11639,7 +11753,7 @@ def mode( To compute the mode over columns and not rows, use the axis parameter: - >>> df.mode(axis='columns', numeric_only=True) + >>> df.mode(axis="columns", numeric_only=True) 0 1 falcon 2.0 NaN horse 4.0 NaN @@ -11746,24 +11860,25 @@ def quantile( Examples -------- - >>> df = pd.DataFrame(np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), - ... columns=['a', 'b']) - >>> df.quantile(.1) + >>> df = pd.DataFrame( + ... np.array([[1, 1], [2, 10], [3, 100], [4, 100]]), columns=["a", "b"] + ... ) + >>> df.quantile(0.1) a 1.3 b 3.7 Name: 0.1, dtype: float64 - >>> df.quantile([.1, .5]) + >>> df.quantile([0.1, 0.5]) a b 0.1 1.3 3.7 0.5 2.5 55.0 Specifying `method='table'` will compute the quantile over all columns. - >>> df.quantile(.1, method="table", interpolation="nearest") + >>> df.quantile(0.1, method="table", interpolation="nearest") a 1 b 1 Name: 0.1, dtype: int64 - >>> df.quantile([.1, .5], method="table", interpolation="nearest") + >>> df.quantile([0.1, 0.5], method="table", interpolation="nearest") a b 0.1 1 1 0.5 3 100 @@ -11771,11 +11886,13 @@ def quantile( Specifying `numeric_only=False` will also compute the quantile of datetime and timedelta data. - >>> df = pd.DataFrame({'A': [1, 2], - ... 'B': [pd.Timestamp('2010'), - ... pd.Timestamp('2011')], - ... 'C': [pd.Timedelta('1 days'), - ... pd.Timedelta('2 days')]}) + >>> df = pd.DataFrame( + ... { + ... "A": [1, 2], + ... "B": [pd.Timestamp("2010"), pd.Timestamp("2011")], + ... "C": [pd.Timedelta("1 days"), pd.Timedelta("2 days")], + ... } + ... ) >>> df.quantile(0.5, numeric_only=False) A 1.5 B 2010-07-02 12:00:00 @@ -11907,8 +12024,8 @@ def to_timestamp( Examples -------- - >>> idx = pd.PeriodIndex(['2023', '2024'], freq='Y') - >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> idx = pd.PeriodIndex(["2023", "2024"], freq="Y") + >>> d = {"col1": [1, 2], "col2": [3, 4]} >>> df1 = pd.DataFrame(data=d, index=idx) >>> df1 col1 col2 @@ -11928,7 +12045,7 @@ def to_timestamp( Using `freq` which is the offset that the Timestamps will have >>> df2 = pd.DataFrame(data=d, index=idx) - >>> df2 = df2.to_timestamp(freq='M') + >>> df2 = df2.to_timestamp(freq="M") >>> df2 col1 col2 2023-01-31 1 3 @@ -12045,8 +12162,9 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: Examples -------- - >>> df = pd.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]}, - ... index=['falcon', 'dog']) + >>> df = pd.DataFrame( + ... {"num_legs": [2, 4], "num_wings": [2, 0]}, index=["falcon", "dog"] + ... ) >>> df num_legs num_wings falcon 2 2 @@ -12070,7 +12188,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: When ``values`` is a dict, we can pass values to check for each column separately: - >>> df.isin({'num_wings': [0, 3]}) + >>> df.isin({"num_wings": [0, 3]}) num_legs num_wings falcon False False dog False True @@ -12079,8 +12197,9 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame: match. Note that 'falcon' does not match based on the number of legs in other. - >>> other = pd.DataFrame({'num_legs': [8, 3], 'num_wings': [0, 2]}, - ... index=['spider', 'falcon']) + >>> other = pd.DataFrame( + ... {"num_legs": [8, 3], "num_wings": [0, 2]}, index=["spider", "falcon"] + ... ) >>> df.isin(other) num_legs num_wings falcon False True @@ -12271,9 +12390,9 @@ def values(self) -> np.ndarray: A DataFrame where all columns are the same type (e.g., int64) results in an array of the same type. - >>> df = pd.DataFrame({'age': [3, 29], - ... 'height': [94, 170], - ... 'weight': [31, 115]}) + >>> df = pd.DataFrame( + ... {"age": [3, 29], "height": [94, 170], "weight": [31, 115]} + ... ) >>> df age height weight 0 3 94 31 @@ -12291,10 +12410,14 @@ def values(self) -> np.ndarray: results in an ndarray of the broadest type that accommodates these mixed types (e.g., object). - >>> df2 = pd.DataFrame([('parrot', 24.0, 'second'), - ... ('lion', 80.5, 1), - ... ('monkey', np.nan, None)], - ... columns=('name', 'max_speed', 'rank')) + >>> df2 = pd.DataFrame( + ... [ + ... ("parrot", 24.0, "second"), + ... ("lion", 80.5, 1), + ... ("monkey", np.nan, None), + ... ], + ... columns=("name", "max_speed", "rank"), + ... ) >>> df2.dtypes name object max_speed float64 diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 93c2afab51d2c..3c71784ad81c4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -352,7 +352,7 @@ def attrs(self) -> dict[Hashable, Any]: For DataFrame: - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> df.attrs = {"A": [10, 20, 30]} >>> df.attrs {'A': [10, 20, 30]} @@ -670,11 +670,11 @@ def ndim(self) -> int: Examples -------- - >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s = pd.Series({"a": 1, "b": 2, "c": 3}) >>> s.ndim 1 - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df.ndim 2 """ @@ -695,11 +695,11 @@ def size(self) -> int: Examples -------- - >>> s = pd.Series({'a': 1, 'b': 2, 'c': 3}) + >>> s = pd.Series({"a": 1, "b": 2, "c": 3}) >>> s.size 3 - >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + >>> df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) >>> df.size 4 """ @@ -867,15 +867,15 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: Examples -------- - >>> df = pd.DataFrame([ - ... [1, 2, 3, 4], - ... [5, 6, 7, 8], - ... [9, 10, 11, 12] - ... ]).set_index([0, 1]).rename_axis(['a', 'b']) + >>> df = ( + ... pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]]) + ... .set_index([0, 1]) + ... .rename_axis(["a", "b"]) + ... ) - >>> df.columns = pd.MultiIndex.from_tuples([ - ... ('c', 'e'), ('d', 'f') - ... ], names=['level_1', 'level_2']) + >>> df.columns = pd.MultiIndex.from_tuples( + ... [("c", "e"), ("d", "f")], names=["level_1", "level_2"] + ... ) >>> df level_1 c d @@ -885,7 +885,7 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: 5 6 7 8 9 10 11 12 - >>> df.droplevel('a') + >>> df.droplevel("a") level_1 c d level_2 e f b @@ -893,7 +893,7 @@ def droplevel(self, level: IndexLabel, axis: Axis = 0) -> Self: 6 7 8 10 11 12 - >>> df.droplevel('level_2', axis=1) + >>> df.droplevel("level_2", axis=1) level_1 c d a b 1 2 3 4 @@ -973,7 +973,7 @@ def squeeze(self, axis: Axis | None = None): Squeezing is even more effective when used with DataFrames. - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']) + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) >>> df a b 0 1 2 @@ -982,7 +982,7 @@ def squeeze(self, axis: Axis | None = None): Slicing a single column will produce a DataFrame with the columns having only one value: - >>> df_a = df[['a']] + >>> df_a = df[["a"]] >>> df_a a 0 1 @@ -990,7 +990,7 @@ def squeeze(self, axis: Axis | None = None): So the columns can be squeezed down, resulting in a Series: - >>> df_a.squeeze('columns') + >>> df_a.squeeze("columns") 0 1 1 3 Name: a, dtype: int64 @@ -998,14 +998,14 @@ def squeeze(self, axis: Axis | None = None): Slicing a single row from a single column will produce a single scalar DataFrame: - >>> df_0a = df.loc[df.index < 1, ['a']] + >>> df_0a = df.loc[df.index < 1, ["a"]] >>> df_0a a 0 1 Squeezing the rows produces a single scalar Series: - >>> df_0a.squeeze('rows') + >>> df_0a.squeeze("rows") a 1 Name: 0, dtype: int64 @@ -1219,9 +1219,9 @@ def rename_axis( -------- **DataFrame** - >>> df = pd.DataFrame({"num_legs": [4, 4, 2], - ... "num_arms": [0, 0, 2]}, - ... ["dog", "cat", "monkey"]) + >>> df = pd.DataFrame( + ... {"num_legs": [4, 4, 2], "num_arms": [0, 0, 2]}, ["dog", "cat", "monkey"] + ... ) >>> df num_legs num_arms dog 4 0 @@ -1244,9 +1244,9 @@ def rename_axis( **MultiIndex** - >>> df.index = pd.MultiIndex.from_product([['mammal'], - ... ['dog', 'cat', 'monkey']], - ... names=['type', 'name']) + >>> df.index = pd.MultiIndex.from_product( + ... [["mammal"], ["dog", "cat", "monkey"]], names=["type", "name"] + ... ) >>> df limbs num_legs num_arms type name @@ -1254,7 +1254,7 @@ def rename_axis( cat 4 0 monkey 2 2 - >>> df.rename_axis(index={'type': 'class'}) + >>> df.rename_axis(index={"type": "class"}) limbs num_legs num_arms class name mammal dog 4 0 @@ -1343,8 +1343,7 @@ def _set_axis_name( Examples -------- - >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, - ... ["dog", "cat", "monkey"]) + >>> df = pd.DataFrame({"num_legs": [4, 4, 2]}, ["dog", "cat", "monkey"]) >>> df num_legs dog 4 @@ -1357,7 +1356,8 @@ def _set_axis_name( cat 4 monkey 2 >>> df.index = pd.MultiIndex.from_product( - ... [["mammal"], ['dog', 'cat', 'monkey']]) + ... [["mammal"], ["dog", "cat", "monkey"]] + ... ) >>> df._set_axis_name(["type", "name"]) num_legs type name @@ -1560,9 +1560,9 @@ def bool(self) -> bool_t: >>> pd.Series([False]).bool() # doctest: +SKIP False - >>> pd.DataFrame({'col': [True]}).bool() # doctest: +SKIP + >>> pd.DataFrame({"col": [True]}).bool() # doctest: +SKIP True - >>> pd.DataFrame({'col': [False]}).bool() # doctest: +SKIP + >>> pd.DataFrame({"col": [False]}).bool() # doctest: +SKIP False This is an alternative method and will only work @@ -1635,7 +1635,7 @@ def abs(self) -> Self: Absolute numeric values in a Series with a Timedelta element. - >>> s = pd.Series([pd.Timedelta('1 days')]) + >>> s = pd.Series([pd.Timedelta("1 days")]) >>> s.abs() 0 1 days dtype: timedelta64[ns] @@ -1643,11 +1643,9 @@ def abs(self) -> Self: Select rows with data closest to certain value using argsort (from `StackOverflow `__). - >>> df = pd.DataFrame({ - ... 'a': [4, 5, 6, 7], - ... 'b': [10, 20, 30, 40], - ... 'c': [100, 50, -30, -50] - ... }) + >>> df = pd.DataFrame( + ... {"a": [4, 5, 6, 7], "b": [10, 20, 30, 40], "c": [100, 50, -30, -50]} + ... ) >>> df a b c 0 4 10 100 @@ -1968,7 +1966,7 @@ def __iter__(self) -> Iterator: Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}) + >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) >>> for x in df: ... print(x) A @@ -1990,8 +1988,9 @@ def keys(self) -> Index: Examples -------- - >>> d = pd.DataFrame(data={'A': [1, 2, 3], 'B': [0, 4, 8]}, - ... index=['a', 'b', 'c']) + >>> d = pd.DataFrame( + ... data={"A": [1, 2, 3], "B": [0, 4, 8]}, index=["a", "b", "c"] + ... ) >>> d A B a 1 0 @@ -2052,7 +2051,7 @@ def empty(self) -> bool_t: -------- An example of an actual empty DataFrame. Notice the index is empty: - >>> df_empty = pd.DataFrame({'A' : []}) + >>> df_empty = pd.DataFrame({"A": []}) >>> df_empty Empty DataFrame Columns: [A] @@ -2063,7 +2062,7 @@ def empty(self) -> bool_t: If we only have NaNs in our DataFrame, it is not considered empty! We will need to drop the NaNs to make the DataFrame empty: - >>> df = pd.DataFrame({'A' : [np.nan]}) + >>> df = pd.DataFrame({"A": [np.nan]}) >>> df A 0 NaN @@ -2072,7 +2071,7 @@ def empty(self) -> bool_t: >>> df.dropna().empty True - >>> ser_empty = pd.Series({'A' : []}) + >>> ser_empty = pd.Series({"A": []}) >>> ser_empty A [] dtype: object @@ -2313,35 +2312,35 @@ def to_excel( Create, write to and save a workbook: - >>> df1 = pd.DataFrame([['a', 'b'], ['c', 'd']], - ... index=['row 1', 'row 2'], - ... columns=['col 1', 'col 2']) + >>> df1 = pd.DataFrame( + ... [["a", "b"], ["c", "d"]], + ... index=["row 1", "row 2"], + ... columns=["col 1", "col 2"], + ... ) >>> df1.to_excel("output.xlsx") # doctest: +SKIP To specify the sheet name: - >>> df1.to_excel("output.xlsx", - ... sheet_name='Sheet_name_1') # doctest: +SKIP + >>> df1.to_excel("output.xlsx", sheet_name="Sheet_name_1") # doctest: +SKIP If you wish to write to more than one sheet in the workbook, it is necessary to specify an ExcelWriter object: >>> df2 = df1.copy() - >>> with pd.ExcelWriter('output.xlsx') as writer: # doctest: +SKIP - ... df1.to_excel(writer, sheet_name='Sheet_name_1') - ... df2.to_excel(writer, sheet_name='Sheet_name_2') + >>> with pd.ExcelWriter("output.xlsx") as writer: # doctest: +SKIP + ... df1.to_excel(writer, sheet_name="Sheet_name_1") + ... df2.to_excel(writer, sheet_name="Sheet_name_2") ExcelWriter can also be used to append to an existing Excel file: - >>> with pd.ExcelWriter('output.xlsx', - ... mode='a') as writer: # doctest: +SKIP - ... df1.to_excel(writer, sheet_name='Sheet_name_3') + >>> with pd.ExcelWriter("output.xlsx", mode="a") as writer: # doctest: +SKIP + ... df1.to_excel(writer, sheet_name="Sheet_name_3") To set the library that is used to write the Excel file, you can pass the `engine` keyword (the default engine is automatically chosen depending on the file extension): - >>> df1.to_excel('output1.xlsx', engine='xlsxwriter') # doctest: +SKIP + >>> df1.to_excel("output1.xlsx", engine="xlsxwriter") # doctest: +SKIP """ if engine_kwargs is None: engine_kwargs = {} @@ -2768,23 +2767,24 @@ def to_hdf( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, - ... index=['a', 'b', 'c']) # doctest: +SKIP - >>> df.to_hdf('data.h5', key='df', mode='w') # doctest: +SKIP + >>> df = pd.DataFrame( + ... {"A": [1, 2, 3], "B": [4, 5, 6]}, index=["a", "b", "c"] + ... ) # doctest: +SKIP + >>> df.to_hdf("data.h5", key="df", mode="w") # doctest: +SKIP We can add another object to the same file: >>> s = pd.Series([1, 2, 3, 4]) # doctest: +SKIP - >>> s.to_hdf('data.h5', key='s') # doctest: +SKIP + >>> s.to_hdf("data.h5", key="s") # doctest: +SKIP Reading from HDF file: - >>> pd.read_hdf('data.h5', 'df') # doctest: +SKIP + >>> pd.read_hdf("data.h5", "df") # doctest: +SKIP A B a 1 4 b 2 5 c 3 6 - >>> pd.read_hdf('data.h5', 's') # doctest: +SKIP + >>> pd.read_hdf("data.h5", "s") # doctest: +SKIP 0 1 1 2 2 3 @@ -3079,7 +3079,9 @@ def to_pickle( Examples -------- - >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df = pd.DataFrame( + ... {{"foo": range(5), "bar": range(5, 10)}} + ... ) # doctest: +SKIP >>> original_df # doctest: +SKIP foo bar 0 0 5 @@ -3097,7 +3099,7 @@ def to_pickle( 2 2 7 3 3 8 4 4 9 - """ # noqa: E501 + """ from pandas.io.pickle import to_pickle to_pickle( @@ -3152,9 +3154,9 @@ def to_clipboard( -------- Copy the contents of a DataFrame to the clipboard. - >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) - >>> df.to_clipboard(sep=',') # doctest: +SKIP + >>> df.to_clipboard(sep=",") # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # ,A,B,C ... # 0,1,2,3 @@ -3163,7 +3165,7 @@ def to_clipboard( We can omit the index by passing the keyword `index` and setting it to false. - >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP + >>> df.to_clipboard(sep=",", index=False) # doctest: +SKIP ... # Wrote the following to the system clipboard: ... # A,B,C ... # 1,2,3 @@ -3174,6 +3176,7 @@ def to_clipboard( .. code-block:: python import pyperclip + html = df.style.to_html() pyperclip.copy(html) """ @@ -3203,12 +3206,15 @@ def to_xarray(self): Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0, 2), - ... ('parrot', 'bird', 24.0, 2), - ... ('lion', 'mammal', 80.5, 4), - ... ('monkey', 'mammal', np.nan, 4)], - ... columns=['name', 'class', 'max_speed', - ... 'num_legs']) + >>> df = pd.DataFrame( + ... [ + ... ("falcon", "bird", 389.0, 2), + ... ("parrot", "bird", 24.0, 2), + ... ("lion", "mammal", 80.5, 4), + ... ("monkey", "mammal", np.nan, 4), + ... ], + ... columns=["name", "class", "max_speed", "num_legs"], + ... ) >>> df name class max_speed num_legs 0 falcon bird 389.0 2 @@ -3227,19 +3233,23 @@ class (index) object 'bird' 'bird' 'mammal' 'mammal' max_speed (index) float64 389.0 24.0 80.5 nan num_legs (index) int64 2 2 4 4 - >>> df['max_speed'].to_xarray() + >>> df["max_speed"].to_xarray() array([389. , 24. , 80.5, nan]) Coordinates: * index (index) int64 0 1 2 3 - >>> dates = pd.to_datetime(['2018-01-01', '2018-01-01', - ... '2018-01-02', '2018-01-02']) - >>> df_multiindex = pd.DataFrame({'date': dates, - ... 'animal': ['falcon', 'parrot', - ... 'falcon', 'parrot'], - ... 'speed': [350, 18, 361, 15]}) - >>> df_multiindex = df_multiindex.set_index(['date', 'animal']) + >>> dates = pd.to_datetime( + ... ["2018-01-01", "2018-01-01", "2018-01-02", "2018-01-02"] + ... ) + >>> df_multiindex = pd.DataFrame( + ... { + ... "date": dates, + ... "animal": ["falcon", "parrot", "falcon", "parrot"], + ... "speed": [350, 18, 361, 15], + ... } + ... ) + >>> df_multiindex = df_multiindex.set_index(["date", "animal"]) >>> df_multiindex speed @@ -3862,31 +3872,34 @@ def to_csv( -------- Create 'out.csv' containing 'df' without indices - >>> df = pd.DataFrame({{'name': ['Raphael', 'Donatello'], - ... 'mask': ['red', 'purple'], - ... 'weapon': ['sai', 'bo staff']}}) - >>> df.to_csv('out.csv', index=False) # doctest: +SKIP + >>> df = pd.DataFrame( + ... [["Raphael", "red", "sai"], ["Donatello", "purple", "bo staff"]], + ... columns=["name", "mask", "weapon"], + ... ) + >>> df.to_csv("out.csv", index=False) # doctest: +SKIP Create 'out.zip' containing 'out.csv' >>> df.to_csv(index=False) 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n' - >>> compression_opts = dict(method='zip', - ... archive_name='out.csv') # doctest: +SKIP - >>> df.to_csv('out.zip', index=False, - ... compression=compression_opts) # doctest: +SKIP + >>> compression_opts = dict( + ... method="zip", archive_name="out.csv" + ... ) # doctest: +SKIP + >>> df.to_csv( + ... "out.zip", index=False, compression=compression_opts + ... ) # doctest: +SKIP To write a csv file to a new folder or nested folder you will first need to create it using either Pathlib or os: >>> from pathlib import Path # doctest: +SKIP - >>> filepath = Path('folder/subfolder/out.csv') # doctest: +SKIP + >>> filepath = Path("folder/subfolder/out.csv") # doctest: +SKIP >>> filepath.parent.mkdir(parents=True, exist_ok=True) # doctest: +SKIP >>> df.to_csv(filepath) # doctest: +SKIP >>> import os # doctest: +SKIP - >>> os.makedirs('folder/subfolder', exist_ok=True) # doctest: +SKIP - >>> df.to_csv('folder/subfolder/out.csv') # doctest: +SKIP + >>> os.makedirs("folder/subfolder", exist_ok=True) # doctest: +SKIP + >>> df.to_csv("folder/subfolder/out.csv") # doctest: +SKIP """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() @@ -3955,12 +3968,16 @@ def take(self, indices, axis: Axis = 0, **kwargs) -> Self: Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', np.nan)], - ... columns=['name', 'class', 'max_speed'], - ... index=[0, 2, 3, 1]) + >>> df = pd.DataFrame( + ... [ + ... ("falcon", "bird", 389.0), + ... ("parrot", "bird", 24.0), + ... ("lion", "mammal", 80.5), + ... ("monkey", "mammal", np.nan), + ... ], + ... columns=["name", "class", "max_speed"], + ... index=[0, 2, 3, 1], + ... ) >>> df name class max_speed 0 falcon bird 389.0 @@ -4086,13 +4103,15 @@ def xs( Examples -------- - >>> d = {'num_legs': [4, 4, 2, 2], - ... 'num_wings': [0, 0, 2, 2], - ... 'class': ['mammal', 'mammal', 'mammal', 'bird'], - ... 'animal': ['cat', 'dog', 'bat', 'penguin'], - ... 'locomotion': ['walks', 'walks', 'flies', 'walks']} + >>> d = { + ... "num_legs": [4, 4, 2, 2], + ... "num_wings": [0, 0, 2, 2], + ... "class": ["mammal", "mammal", "mammal", "bird"], + ... "animal": ["cat", "dog", "bat", "penguin"], + ... "locomotion": ["walks", "walks", "flies", "walks"], + ... } >>> df = pd.DataFrame(data=d) - >>> df = df.set_index(['class', 'animal', 'locomotion']) + >>> df = df.set_index(["class", "animal", "locomotion"]) >>> df num_legs num_wings class animal locomotion @@ -4103,7 +4122,7 @@ class animal locomotion Get values at specified index - >>> df.xs('mammal') + >>> df.xs("mammal") num_legs num_wings animal locomotion cat walks 4 0 @@ -4112,29 +4131,28 @@ class animal locomotion Get values at several indexes - >>> df.xs(('mammal', 'dog', 'walks')) + >>> df.xs(("mammal", "dog", "walks")) num_legs 4 num_wings 0 Name: (mammal, dog, walks), dtype: int64 Get values at specified index and level - >>> df.xs('cat', level=1) + >>> df.xs("cat", level=1) num_legs num_wings class locomotion mammal walks 4 0 Get values at several indexes and levels - >>> df.xs(('bird', 'walks'), - ... level=[0, 'locomotion']) + >>> df.xs(("bird", "walks"), level=[0, "locomotion"]) num_legs num_wings animal penguin 2 2 Get values at specified column and axis - >>> df.xs('num_wings', axis=1) + >>> df.xs("num_wings", axis=1) class animal locomotion mammal cat walks 0 dog walks 0 @@ -4333,8 +4351,8 @@ def get(self, key, default=None): 2014-02-14 22.0 medium 2014-02-15 35.0 medium - >>> ser = df['windspeed'] - >>> ser.get('2014-02-13') + >>> ser = df["windspeed"] + >>> ser.get("2014-02-13") 'high' If the key isn't found, the default value will be used. @@ -4342,7 +4360,7 @@ def get(self, key, default=None): >>> df.get(["temp_celsius", "temp_kelvin"], default="default_value") 'default_value' - >>> ser.get('2014-02-10', '[unknown]') + >>> ser.get("2014-02-10", "[unknown]") '[unknown]' """ try: @@ -4434,14 +4452,16 @@ def reindex_like( Examples -------- - >>> df1 = pd.DataFrame([[24.3, 75.7, 'high'], - ... [31, 87.8, 'high'], - ... [22, 71.6, 'medium'], - ... [35, 95, 'medium']], - ... columns=['temp_celsius', 'temp_fahrenheit', - ... 'windspeed'], - ... index=pd.date_range(start='2014-02-12', - ... end='2014-02-15', freq='D')) + >>> df1 = pd.DataFrame( + ... [ + ... [24.3, 75.7, "high"], + ... [31, 87.8, "high"], + ... [22, 71.6, "medium"], + ... [35, 95, "medium"], + ... ], + ... columns=["temp_celsius", "temp_fahrenheit", "windspeed"], + ... index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"), + ... ) >>> df1 temp_celsius temp_fahrenheit windspeed @@ -4450,12 +4470,11 @@ def reindex_like( 2014-02-14 22.0 71.6 medium 2014-02-15 35.0 95.0 medium - >>> df2 = pd.DataFrame([[28, 'low'], - ... [30, 'low'], - ... [35.1, 'medium']], - ... columns=['temp_celsius', 'windspeed'], - ... index=pd.DatetimeIndex(['2014-02-12', '2014-02-13', - ... '2014-02-15'])) + >>> df2 = pd.DataFrame( + ... [[28, "low"], [30, "low"], [35.1, "medium"]], + ... columns=["temp_celsius", "windspeed"], + ... index=pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"]), + ... ) >>> df2 temp_celsius windspeed @@ -4698,14 +4717,14 @@ def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self: 3 4 dtype: int64 - >>> s.add_prefix('item_') + >>> s.add_prefix("item_") item_0 1 item_1 2 item_2 3 item_3 4 dtype: int64 - >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}) >>> df A B 0 1 3 @@ -4713,7 +4732,7 @@ def add_prefix(self, prefix: str, axis: Axis | None = None) -> Self: 2 3 5 3 4 6 - >>> df.add_prefix('col_') + >>> df.add_prefix("col_") col_A col_B 0 1 3 1 2 4 @@ -4772,14 +4791,14 @@ def add_suffix(self, suffix: str, axis: Axis | None = None) -> Self: 3 4 dtype: int64 - >>> s.add_suffix('_item') + >>> s.add_suffix("_item") 0_item 1 1_item 2 2_item 3 3_item 4 dtype: int64 - >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) + >>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}) >>> df A B 0 1 3 @@ -4787,7 +4806,7 @@ def add_suffix(self, suffix: str, axis: Axis | None = None) -> Self: 2 3 5 3 4 6 - >>> df.add_suffix('_col') + >>> df.add_suffix("_col") A_col B_col 0 1 3 1 2 4 @@ -4904,12 +4923,14 @@ def sort_values( Examples -------- - >>> df = pd.DataFrame({ - ... 'col1': ['A', 'A', 'B', np.nan, 'D', 'C'], - ... 'col2': [2, 1, 9, 8, 7, 4], - ... 'col3': [0, 1, 9, 4, 2, 3], - ... 'col4': ['a', 'B', 'c', 'D', 'e', 'F'] - ... }) + >>> df = pd.DataFrame( + ... { + ... "col1": ["A", "A", "B", np.nan, "D", "C"], + ... "col2": [2, 1, 9, 8, 7, 4], + ... "col3": [0, 1, 9, 4, 2, 3], + ... "col4": ["a", "B", "c", "D", "e", "F"], + ... } + ... ) >>> df col1 col2 col3 col4 0 A 2 0 a @@ -4921,7 +4942,7 @@ def sort_values( Sort by col1 - >>> df.sort_values(by=['col1']) + >>> df.sort_values(by=["col1"]) col1 col2 col3 col4 0 A 2 0 a 1 A 1 1 B @@ -4932,7 +4953,7 @@ def sort_values( Sort by multiple columns - >>> df.sort_values(by=['col1', 'col2']) + >>> df.sort_values(by=["col1", "col2"]) col1 col2 col3 col4 1 A 1 1 B 0 A 2 0 a @@ -4943,7 +4964,7 @@ def sort_values( Sort Descending - >>> df.sort_values(by='col1', ascending=False) + >>> df.sort_values(by="col1", ascending=False) col1 col2 col3 col4 4 D 7 2 e 5 C 4 3 F @@ -4954,7 +4975,7 @@ def sort_values( Putting NAs first - >>> df.sort_values(by='col1', ascending=False, na_position='first') + >>> df.sort_values(by="col1", ascending=False, na_position="first") col1 col2 col3 col4 3 NaN 8 4 D 4 D 7 2 e @@ -4965,7 +4986,7 @@ def sort_values( Sorting with a key function - >>> df.sort_values(by='col4', key=lambda col: col.str.lower()) + >>> df.sort_values(by="col4", key=lambda col: col.str.lower()) col1 col2 col3 col4 0 A 2 0 a 1 A 1 1 B @@ -4977,10 +4998,12 @@ def sort_values( Natural sort with the key argument, using the `natsort ` package. - >>> df = pd.DataFrame({ - ... "time": ['0hr', '128hr', '72hr', '48hr', '96hr'], - ... "value": [10, 20, 30, 40, 50] - ... }) + >>> df = pd.DataFrame( + ... { + ... "time": ["0hr", "128hr", "72hr", "48hr", "96hr"], + ... "value": [10, 20, 30, 40, 50], + ... } + ... ) >>> df time value 0 0hr 10 @@ -4990,8 +5013,7 @@ def sort_values( 4 96hr 50 >>> from natsort import index_natsorted >>> df.sort_values( - ... by="time", - ... key=lambda x: np.argsort(index_natsorted(df["time"])) + ... by="time", key=lambda x: np.argsort(index_natsorted(df["time"])) ... ) time value 0 0hr 10 @@ -5197,10 +5219,13 @@ def reindex( Create a dataframe with some fictional data. - >>> index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror'] - >>> df = pd.DataFrame({{'http_status': [200, 200, 404, 404, 301], - ... 'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]}}, - ... index=index) + >>> index = ["Firefox", "Chrome", "Safari", "IE10", "Konqueror"] + >>> columns = ["http_status", "response_time"] + >>> df = pd.DataFrame( + ... [[200, 0.04], [200, 0.02], [404, 0.07], [404, 0.08], [301, 1.0]], + ... columns=columns, + ... index=index, + ... ) >>> df http_status response_time Firefox 200 0.04 @@ -5213,8 +5238,7 @@ def reindex( values in the new index that do not have corresponding records in the dataframe are assigned ``NaN``. - >>> new_index = ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', - ... 'Chrome'] + >>> new_index = ["Safari", "Iceweasel", "Comodo Dragon", "IE10", "Chrome"] >>> df.reindex(new_index) http_status response_time Safari 404.0 0.07 @@ -5236,7 +5260,7 @@ def reindex( IE10 404 0.08 Chrome 200 0.02 - >>> df.reindex(new_index, fill_value='missing') + >>> df.reindex(new_index, fill_value="missing") http_status response_time Safari 404 0.07 Iceweasel missing missing @@ -5246,7 +5270,7 @@ def reindex( We can also reindex the columns. - >>> df.reindex(columns=['http_status', 'user_agent']) + >>> df.reindex(columns=["http_status", "user_agent"]) http_status user_agent Firefox 200 NaN Chrome 200 NaN @@ -5256,7 +5280,7 @@ def reindex( Or we can use "axis-style" keyword arguments - >>> df.reindex(['http_status', 'user_agent'], axis="columns") + >>> df.reindex(["http_status", "user_agent"], axis="columns") http_status user_agent Firefox 200 NaN Chrome 200 NaN @@ -5269,9 +5293,10 @@ def reindex( monotonically increasing index (for example, a sequence of dates). - >>> date_index = pd.date_range('1/1/2010', periods=6, freq='D') - >>> df2 = pd.DataFrame({{"prices": [100, 101, np.nan, 100, 89, 88]}}, - ... index=date_index) + >>> date_index = pd.date_range("1/1/2010", periods=6, freq="D") + >>> df2 = pd.DataFrame( + ... {{"prices": [100, 101, np.nan, 100, 89, 88]}}, index=date_index + ... ) >>> df2 prices 2010-01-01 100.0 @@ -5284,7 +5309,7 @@ def reindex( Suppose we decide to expand the dataframe to cover a wider date range. - >>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D') + >>> date_index2 = pd.date_range("12/29/2009", periods=10, freq="D") >>> df2.reindex(date_index2) prices 2009-12-29 NaN @@ -5306,7 +5331,7 @@ def reindex( For example, to back-propagate the last valid value to fill the ``NaN`` values, pass ``bfill`` as an argument to the ``method`` keyword. - >>> df2.reindex(date_index2, method='bfill') + >>> df2.reindex(date_index2, method="bfill") prices 2009-12-29 100.0 2009-12-30 100.0 @@ -5515,28 +5540,30 @@ def filter( Examples -------- - >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])), - ... index=['mouse', 'rabbit'], - ... columns=['one', 'two', 'three']) + >>> df = pd.DataFrame( + ... np.array(([1, 2, 3], [4, 5, 6])), + ... index=["mouse", "rabbit"], + ... columns=["one", "two", "three"], + ... ) >>> df one two three mouse 1 2 3 rabbit 4 5 6 >>> # select columns by name - >>> df.filter(items=['one', 'three']) + >>> df.filter(items=["one", "three"]) one three mouse 1 3 rabbit 4 6 >>> # select columns by regular expression - >>> df.filter(regex='e$', axis=1) + >>> df.filter(regex="e$", axis=1) one three mouse 1 3 rabbit 4 6 >>> # select rows containing 'bbi' - >>> df.filter(like='bbi', axis=0) + >>> df.filter(like="bbi", axis=0) one two three rabbit 4 5 6 """ @@ -5608,8 +5635,21 @@ def head(self, n: int = 5) -> Self: Examples -------- - >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', - ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df = pd.DataFrame( + ... { + ... "animal": [ + ... "alligator", + ... "bee", + ... "falcon", + ... "lion", + ... "monkey", + ... "parrot", + ... "shark", + ... "whale", + ... "zebra", + ... ] + ... } + ... ) >>> df animal 0 alligator @@ -5685,8 +5725,21 @@ def tail(self, n: int = 5) -> Self: Examples -------- - >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion', - ... 'monkey', 'parrot', 'shark', 'whale', 'zebra']}) + >>> df = pd.DataFrame( + ... { + ... "animal": [ + ... "alligator", + ... "bee", + ... "falcon", + ... "lion", + ... "monkey", + ... "parrot", + ... "shark", + ... "whale", + ... "zebra", + ... ] + ... } + ... ) >>> df animal 0 alligator @@ -5811,10 +5864,14 @@ def sample( Examples -------- - >>> df = pd.DataFrame({'num_legs': [2, 4, 8, 0], - ... 'num_wings': [2, 0, 0, 0], - ... 'num_specimen_seen': [10, 2, 1, 8]}, - ... index=['falcon', 'dog', 'spider', 'fish']) + >>> df = pd.DataFrame( + ... { + ... "num_legs": [2, 4, 8, 0], + ... "num_wings": [2, 0, 0, 0], + ... "num_specimen_seen": [10, 2, 1, 8], + ... }, + ... index=["falcon", "dog", "spider", "fish"], + ... ) >>> df num_legs num_wings num_specimen_seen falcon 2 2 10 @@ -5826,7 +5883,7 @@ def sample( Note that we use `random_state` to ensure the reproducibility of the examples. - >>> df['num_legs'].sample(n=3, random_state=1) + >>> df["num_legs"].sample(n=3, random_state=1) fish 0 spider 8 falcon 2 @@ -5856,7 +5913,7 @@ def sample( Using a DataFrame column as weights. Rows with larger value in the `num_specimen_seen` column are more likely to be sampled. - >>> df.sample(n=2, weights='num_specimen_seen', random_state=1) + >>> df.sample(n=2, weights="num_specimen_seen", random_state=1) num_legs num_wings num_specimen_seen falcon 2 2 10 fish 0 0 8 @@ -5949,7 +6006,7 @@ def pipe( Constructing a income DataFrame from a dictionary. >>> data = [[8000, 1000], [9500, np.nan], [5000, 2000]] - >>> df = pd.DataFrame(data, columns=['Salary', 'Others']) + >>> df = pd.DataFrame(data, columns=["Salary", "Others"]) >>> df Salary Others 0 8000 1000.0 @@ -5971,7 +6028,8 @@ def pipe( >>> subtract_national_insurance( ... subtract_state_tax(subtract_federal_tax(df), rate=0.12), ... rate=0.05, - ... rate_increase=0.02) # doctest: +SKIP + ... rate_increase=0.02, + ... ) # doctest: +SKIP You can write @@ -5997,9 +6055,7 @@ def pipe( ... df.pipe(subtract_federal_tax) ... .pipe(subtract_state_tax, rate=0.12) ... .pipe( - ... (subtract_national_insurance, 'df'), - ... rate=0.05, - ... rate_increase=0.02 + ... (subtract_national_insurance, "df"), rate=0.05, rate_increase=0.02 ... ) ... ) Salary Others @@ -6209,10 +6265,14 @@ def dtypes(self): Examples -------- - >>> df = pd.DataFrame({'float': [1.0], - ... 'int': [1], - ... 'datetime': [pd.Timestamp('20180310')], - ... 'string': ['foo']}) + >>> df = pd.DataFrame( + ... { + ... "float": [1.0], + ... "int": [1], + ... "datetime": [pd.Timestamp("20180310")], + ... "string": ["foo"], + ... } + ... ) >>> df.dtypes float float64 int int64 @@ -6283,7 +6343,7 @@ def astype( -------- Create a DataFrame: - >>> d = {'col1': [1, 2], 'col2': [3, 4]} + >>> d = {"col1": [1, 2], "col2": [3, 4]} >>> df = pd.DataFrame(data=d) >>> df.dtypes col1 int64 @@ -6292,33 +6352,33 @@ def astype( Cast all columns to int32: - >>> df.astype('int32').dtypes + >>> df.astype("int32").dtypes col1 int32 col2 int32 dtype: object Cast col1 to int32 using a dictionary: - >>> df.astype({'col1': 'int32'}).dtypes + >>> df.astype({"col1": "int32"}).dtypes col1 int32 col2 int64 dtype: object Create a series: - >>> ser = pd.Series([1, 2], dtype='int32') + >>> ser = pd.Series([1, 2], dtype="int32") >>> ser 0 1 1 2 dtype: int32 - >>> ser.astype('int64') + >>> ser.astype("int64") 0 1 1 2 dtype: int64 Convert to categorical type: - >>> ser.astype('category') + >>> ser.astype("category") 0 1 1 2 dtype: category @@ -6327,8 +6387,7 @@ def astype( Convert to ordered categorical type with custom ordering: >>> from pandas.api.types import CategoricalDtype - >>> cat_dtype = CategoricalDtype( - ... categories=[2, 1], ordered=True) + >>> cat_dtype = CategoricalDtype(categories=[2, 1], ordered=True) >>> ser.astype(cat_dtype) 0 1 1 2 @@ -6337,7 +6396,7 @@ def astype( Create a series of dates: - >>> ser_date = pd.Series(pd.date_range('20200101', periods=3)) + >>> ser_date = pd.Series(pd.date_range("20200101", periods=3)) >>> ser_date 0 2020-01-01 1 2020-01-02 @@ -6952,11 +7011,15 @@ def fillna( Examples -------- - >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], - ... [3, 4, np.nan, 1], - ... [np.nan, np.nan, np.nan, np.nan], - ... [np.nan, 3, np.nan, 4]], - ... columns=list("ABCD")) + >>> df = pd.DataFrame( + ... [ + ... [np.nan, 2, np.nan, 0], + ... [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, np.nan], + ... [np.nan, 3, np.nan, 4], + ... ], + ... columns=list("ABCD"), + ... ) >>> df A B C D 0 NaN 2.0 NaN 0.0 @@ -7265,11 +7328,15 @@ def ffill( Examples -------- - >>> df = pd.DataFrame([[np.nan, 2, np.nan, 0], - ... [3, 4, np.nan, 1], - ... [np.nan, np.nan, np.nan, np.nan], - ... [np.nan, 3, np.nan, 4]], - ... columns=list("ABCD")) + >>> df = pd.DataFrame( + ... [ + ... [np.nan, 2, np.nan, 0], + ... [3, 4, np.nan, 1], + ... [np.nan, np.nan, np.nan, np.nan], + ... [np.nan, 3, np.nan, 4], + ... ], + ... columns=list("ABCD"), + ... ) >>> df A B C D 0 NaN 2.0 NaN 0.0 @@ -7460,7 +7527,7 @@ def bfill( With DataFrame: - >>> df = pd.DataFrame({{'A': [1, None, None, 4], 'B': [None, 5, None, 7]}}) + >>> df = pd.DataFrame({{"A": [1, None, None, 4], "B": [None, 5, None, 7]}}) >>> df A B 0 1.0 NaN @@ -8009,7 +8076,7 @@ def interpolate( an ``order`` (int). >>> s = pd.Series([0, 2, np.nan, 8]) - >>> s.interpolate(method='polynomial', order=2) + >>> s.interpolate(method="polynomial", order=2) 0 0.000000 1 2.000000 2 4.666667 @@ -8024,18 +8091,22 @@ def interpolate( Note how the first entry in column 'b' remains ``NaN``, because there is no entry before it to use for interpolation. - >>> df = pd.DataFrame([(0.0, np.nan, -1.0, 1.0), - ... (np.nan, 2.0, np.nan, np.nan), - ... (2.0, 3.0, np.nan, 9.0), - ... (np.nan, 4.0, -4.0, 16.0)], - ... columns=list('abcd')) + >>> df = pd.DataFrame( + ... [ + ... (0.0, np.nan, -1.0, 1.0), + ... (np.nan, 2.0, np.nan, np.nan), + ... (2.0, 3.0, np.nan, 9.0), + ... (np.nan, 4.0, -4.0, 16.0), + ... ], + ... columns=list("abcd"), + ... ) >>> df a b c d 0 0.0 NaN -1.0 1.0 1 NaN 2.0 NaN NaN 2 2.0 3.0 NaN 9.0 3 NaN 4.0 -4.0 16.0 - >>> df.interpolate(method='linear', limit_direction='forward', axis=0) + >>> df.interpolate(method="linear", limit_direction="forward", axis=0) a b c d 0 0.0 NaN -1.0 1.0 1 1.0 2.0 -2.0 5.0 @@ -8044,7 +8115,7 @@ def interpolate( Using polynomial interpolation. - >>> df['d'].interpolate(method='polynomial', order=2) + >>> df["d"].interpolate(method="polynomial", order=2) 0 1.0 1 4.0 2 9.0 @@ -8247,24 +8318,32 @@ def asof(self, where, subset=None): Take all columns into consideration - >>> df = pd.DataFrame({'a': [10., 20., 30., 40., 50.], - ... 'b': [None, None, None, None, 500]}, - ... index=pd.DatetimeIndex(['2018-02-27 09:01:00', - ... '2018-02-27 09:02:00', - ... '2018-02-27 09:03:00', - ... '2018-02-27 09:04:00', - ... '2018-02-27 09:05:00'])) - >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', - ... '2018-02-27 09:04:30'])) + >>> df = pd.DataFrame( + ... { + ... "a": [10.0, 20.0, 30.0, 40.0, 50.0], + ... "b": [None, None, None, None, 500], + ... }, + ... index=pd.DatetimeIndex( + ... [ + ... "2018-02-27 09:01:00", + ... "2018-02-27 09:02:00", + ... "2018-02-27 09:03:00", + ... "2018-02-27 09:04:00", + ... "2018-02-27 09:05:00", + ... ] + ... ), + ... ) + >>> df.asof(pd.DatetimeIndex(["2018-02-27 09:03:30", "2018-02-27 09:04:30"])) a b 2018-02-27 09:03:30 NaN NaN 2018-02-27 09:04:30 NaN NaN Take a single column into consideration - >>> df.asof(pd.DatetimeIndex(['2018-02-27 09:03:30', - ... '2018-02-27 09:04:30']), - ... subset=['a']) + >>> df.asof( + ... pd.DatetimeIndex(["2018-02-27 09:03:30", "2018-02-27 09:04:30"]), + ... subset=["a"], + ... ) a b 2018-02-27 09:03:30 30.0 NaN 2018-02-27 09:04:30 40.0 NaN @@ -8375,11 +8454,18 @@ def isna(self) -> Self: -------- Show which entries in a DataFrame are NA. - >>> df = pd.DataFrame(dict(age=[5, 6, np.nan], - ... born=[pd.NaT, pd.Timestamp('1939-05-27'), - ... pd.Timestamp('1940-04-25')], - ... name=['Alfred', 'Batman', ''], - ... toy=[None, 'Batmobile', 'Joker'])) + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) >>> df age born name toy 0 5.0 NaT Alfred None @@ -8442,11 +8528,18 @@ def notna(self) -> Self: -------- Show which entries in a DataFrame are not NA. - >>> df = pd.DataFrame(dict(age=[5, 6, np.nan], - ... born=[pd.NaT, pd.Timestamp('1939-05-27'), - ... pd.Timestamp('1940-04-25')], - ... name=['Alfred', 'Batman', ''], - ... toy=[None, 'Batmobile', 'Joker'])) + >>> df = pd.DataFrame( + ... dict( + ... age=[5, 6, np.nan], + ... born=[ + ... pd.NaT, + ... pd.Timestamp("1939-05-27"), + ... pd.Timestamp("1940-04-25"), + ... ], + ... name=["Alfred", "Batman", ""], + ... toy=[None, "Batmobile", "Joker"], + ... ) + ... ) >>> df age born name toy 0 5.0 NaT Alfred None @@ -8619,7 +8712,7 @@ def clip( Examples -------- - >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]} + >>> data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]} >>> df = pd.DataFrame(data) >>> df col_0 col_1 @@ -8832,9 +8925,9 @@ def asfreq( -------- Start by creating a series with 4 one minute timestamps. - >>> index = pd.date_range('1/1/2000', periods=4, freq='min') + >>> index = pd.date_range("1/1/2000", periods=4, freq="min") >>> series = pd.Series([0.0, None, 2.0, 3.0], index=index) - >>> df = pd.DataFrame({{'s': series}}) + >>> df = pd.DataFrame({{"s": series}}) >>> df s 2000-01-01 00:00:00 0.0 @@ -8844,7 +8937,7 @@ def asfreq( Upsample the series into 30 second bins. - >>> df.asfreq(freq='30s') + >>> df.asfreq(freq="30s") s 2000-01-01 00:00:00 0.0 2000-01-01 00:00:30 NaN @@ -8856,7 +8949,7 @@ def asfreq( Upsample again, providing a ``fill value``. - >>> df.asfreq(freq='30s', fill_value=9.0) + >>> df.asfreq(freq="30s", fill_value=9.0) s 2000-01-01 00:00:00 0.0 2000-01-01 00:00:30 9.0 @@ -8868,7 +8961,7 @@ def asfreq( Upsample again, providing a ``method``. - >>> df.asfreq(freq='30s', method='bfill') + >>> df.asfreq(freq="30s", method="bfill") s 2000-01-01 00:00:00 0.0 2000-01-01 00:00:30 NaN @@ -8920,8 +9013,8 @@ def at_time(self, time, asof: bool_t = False, axis: Axis | None = None) -> Self: Examples -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='12h') - >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> i = pd.date_range("2018-04-09", periods=4, freq="12h") + >>> ts = pd.DataFrame({"A": [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 @@ -8929,7 +9022,7 @@ def at_time(self, time, asof: bool_t = False, axis: Axis | None = None) -> Self: 2018-04-10 00:00:00 3 2018-04-10 12:00:00 4 - >>> ts.at_time('12:00') + >>> ts.at_time("12:00") A 2018-04-09 12:00:00 2 2018-04-10 12:00:00 4 @@ -8992,8 +9085,8 @@ def between_time( Examples -------- - >>> i = pd.date_range('2018-04-09', periods=4, freq='1D20min') - >>> ts = pd.DataFrame({'A': [1, 2, 3, 4]}, index=i) + >>> i = pd.date_range("2018-04-09", periods=4, freq="1D20min") + >>> ts = pd.DataFrame({"A": [1, 2, 3, 4]}, index=i) >>> ts A 2018-04-09 00:00:00 1 @@ -9001,7 +9094,7 @@ def between_time( 2018-04-11 00:40:00 3 2018-04-12 01:00:00 4 - >>> ts.between_time('0:15', '0:45') + >>> ts.between_time("0:15", "0:45") A 2018-04-10 00:20:00 2 2018-04-11 00:40:00 3 @@ -9009,7 +9102,7 @@ def between_time( You get the times that are *not* between two times by setting ``start_time`` later than ``end_time``: - >>> ts.between_time('0:45', '0:15') + >>> ts.between_time("0:45", "0:15") A 2018-04-09 00:00:00 1 2018-04-12 01:00:00 4 @@ -9146,7 +9239,7 @@ def resample( -------- Start by creating a series with 9 one minute timestamps. - >>> index = pd.date_range('1/1/2000', periods=9, freq='min') + >>> index = pd.date_range("1/1/2000", periods=9, freq="min") >>> series = pd.Series(range(9), index=index) >>> series 2000-01-01 00:00:00 0 @@ -9163,7 +9256,7 @@ def resample( Downsample the series into 3 minute bins and sum the values of the timestamps falling into a bin. - >>> series.resample('3min').sum() + >>> series.resample("3min").sum() 2000-01-01 00:00:00 3 2000-01-01 00:03:00 12 2000-01-01 00:06:00 21 @@ -9177,7 +9270,7 @@ def resample( value in the resampled bucket with the label ``2000-01-01 00:03:00`` does not include 3 (if it did, the summed value would be 6, not 3). - >>> series.resample('3min', label='right').sum() + >>> series.resample("3min", label="right").sum() 2000-01-01 00:03:00 3 2000-01-01 00:06:00 12 2000-01-01 00:09:00 21 @@ -9186,7 +9279,7 @@ def resample( To include this value close the right side of the bin interval, as shown below. - >>> series.resample('3min', label='right', closed='right').sum() + >>> series.resample("3min", label="right", closed="right").sum() 2000-01-01 00:00:00 0 2000-01-01 00:03:00 6 2000-01-01 00:06:00 15 @@ -9195,7 +9288,7 @@ def resample( Upsample the series into 30 second bins. - >>> series.resample('30s').asfreq()[0:5] # Select first 5 rows + >>> series.resample("30s").asfreq()[0:5] # Select first 5 rows 2000-01-01 00:00:00 0.0 2000-01-01 00:00:30 NaN 2000-01-01 00:01:00 1.0 @@ -9206,7 +9299,7 @@ def resample( Upsample the series into 30 second bins and fill the ``NaN`` values using the ``ffill`` method. - >>> series.resample('30s').ffill()[0:5] + >>> series.resample("30s").ffill()[0:5] 2000-01-01 00:00:00 0 2000-01-01 00:00:30 0 2000-01-01 00:01:00 1 @@ -9217,7 +9310,7 @@ def resample( Upsample the series into 30 second bins and fill the ``NaN`` values using the ``bfill`` method. - >>> series.resample('30s').bfill()[0:5] + >>> series.resample("30s").bfill()[0:5] 2000-01-01 00:00:00 0 2000-01-01 00:00:30 1 2000-01-01 00:01:00 1 @@ -9229,8 +9322,7 @@ def resample( >>> def custom_resampler(arraylike): ... return np.sum(arraylike) + 5 - ... - >>> series.resample('3min').apply(custom_resampler) + >>> series.resample("3min").apply(custom_resampler) 2000-01-01 00:00:00 8 2000-01-01 00:03:00 17 2000-01-01 00:06:00 26 @@ -9239,12 +9331,9 @@ def resample( For DataFrame objects, the keyword `on` can be used to specify the column instead of the index for resampling. - >>> d = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], - ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} - >>> df = pd.DataFrame(d) - >>> df['week_starting'] = pd.date_range('01/01/2018', - ... periods=8, - ... freq='W') + >>> df = pd.DataFrame([10, 11, 9, 13, 14, 18, 17, 19], columns=["price"]) + >>> df["volume"] = [50, 60, 40, 100, 50, 100, 40, 50] + >>> df["week_starting"] = pd.date_range("01/01/2018", periods=8, freq="W") >>> df price volume week_starting 0 10 50 2018-01-07 @@ -9255,7 +9344,7 @@ def resample( 5 18 100 2018-02-11 6 17 40 2018-02-18 7 19 50 2018-02-25 - >>> df.resample('ME', on='week_starting').mean() + >>> df.resample("ME", on="week_starting").mean() price volume week_starting 2018-01-31 10.75 62.5 @@ -9264,14 +9353,20 @@ def resample( For a DataFrame with MultiIndex, the keyword `level` can be used to specify on which level the resampling needs to take place. - >>> days = pd.date_range('1/1/2000', periods=4, freq='D') - >>> d2 = {{'price': [10, 11, 9, 13, 14, 18, 17, 19], - ... 'volume': [50, 60, 40, 100, 50, 100, 40, 50]}} + >>> days = pd.date_range("1/1/2000", periods=4, freq="D") >>> df2 = pd.DataFrame( - ... d2, - ... index=pd.MultiIndex.from_product( - ... [days, ['morning', 'afternoon']] - ... ) + ... [ + ... [10, 50], + ... [11, 60], + ... [9, 40], + ... [13, 100], + ... [14, 50], + ... [18, 100], + ... [17, 40], + ... [19, 50], + ... ], + ... columns=["price", "volume"], + ... index=pd.MultiIndex.from_product([days, ["morning", "afternoon"]]), ... ) >>> df2 price volume @@ -9283,7 +9378,7 @@ def resample( afternoon 18 100 2000-01-04 morning 17 40 afternoon 19 50 - >>> df2.resample('D', level=0).sum() + >>> df2.resample("D", level=0).sum() price volume 2000-01-01 21 110 2000-01-02 22 140 @@ -9292,8 +9387,8 @@ def resample( If you want to adjust the start of the bins based on a fixed timestamp: - >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' - >>> rng = pd.date_range(start, end, freq='7min') + >>> start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00" + >>> rng = pd.date_range(start, end, freq="7min") >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) >>> ts 2000-10-01 23:30:00 0 @@ -9307,7 +9402,7 @@ def resample( 2000-10-02 00:26:00 24 Freq: 7min, dtype: int64 - >>> ts.resample('17min').sum() + >>> ts.resample("17min").sum() 2000-10-01 23:14:00 0 2000-10-01 23:31:00 9 2000-10-01 23:48:00 21 @@ -9315,7 +9410,7 @@ def resample( 2000-10-02 00:22:00 24 Freq: 17min, dtype: int64 - >>> ts.resample('17min', origin='epoch').sum() + >>> ts.resample("17min", origin="epoch").sum() 2000-10-01 23:18:00 0 2000-10-01 23:35:00 18 2000-10-01 23:52:00 27 @@ -9323,7 +9418,7 @@ def resample( 2000-10-02 00:26:00 24 Freq: 17min, dtype: int64 - >>> ts.resample('17min', origin='2000-01-01').sum() + >>> ts.resample("17min", origin="2000-01-01").sum() 2000-10-01 23:24:00 3 2000-10-01 23:41:00 15 2000-10-01 23:58:00 45 @@ -9333,14 +9428,14 @@ def resample( If you want to adjust the start of the bins with an `offset` Timedelta, the two following lines are equivalent: - >>> ts.resample('17min', origin='start').sum() + >>> ts.resample("17min", origin="start").sum() 2000-10-01 23:30:00 9 2000-10-01 23:47:00 21 2000-10-02 00:04:00 54 2000-10-02 00:21:00 24 Freq: 17min, dtype: int64 - >>> ts.resample('17min', offset='23h30min').sum() + >>> ts.resample("17min", offset="23h30min").sum() 2000-10-01 23:30:00 9 2000-10-01 23:47:00 21 2000-10-02 00:04:00 54 @@ -9349,7 +9444,7 @@ def resample( If you want to take the largest Timestamp as the end of the bins: - >>> ts.resample('17min', origin='end').sum() + >>> ts.resample("17min", origin="end").sum() 2000-10-01 23:35:00 0 2000-10-01 23:52:00 18 2000-10-02 00:09:00 27 @@ -9360,7 +9455,7 @@ def resample( midnight of the largest Timestamp as the end of the bins and drop the bins not containing data: - >>> ts.resample('17min', origin='end_day').sum() + >>> ts.resample("17min", origin="end_day").sum() 2000-10-01 23:38:00 3 2000-10-01 23:55:00 15 2000-10-02 00:12:00 45 @@ -9468,9 +9563,12 @@ def rank( Examples -------- - >>> df = pd.DataFrame(data={'Animal': ['cat', 'penguin', 'dog', - ... 'spider', 'snake'], - ... 'Number_legs': [4, 2, 4, 8, np.nan]}) + >>> df = pd.DataFrame( + ... data={ + ... "Animal": ["cat", "penguin", "dog", "spider", "snake"], + ... "Number_legs": [4, 2, 4, 8, np.nan], + ... } + ... ) >>> df Animal Number_legs 0 cat 4.0 @@ -9504,10 +9602,10 @@ def rank( * pct_rank: when setting ``pct = True``, the ranking is expressed as percentile rank. - >>> df['default_rank'] = df['Number_legs'].rank() - >>> df['max_rank'] = df['Number_legs'].rank(method='max') - >>> df['NA_bottom'] = df['Number_legs'].rank(na_option='bottom') - >>> df['pct_rank'] = df['Number_legs'].rank(pct=True) + >>> df["default_rank"] = df["Number_legs"].rank() + >>> df["max_rank"] = df["Number_legs"].rank(method="max") + >>> df["NA_bottom"] = df["Number_legs"].rank(na_option="bottom") + >>> df["pct_rank"] = df["Number_legs"].rank(pct=True) >>> df Animal Number_legs default_rank max_rank NA_bottom pct_rank 0 cat 4.0 2.5 3.0 2.5 0.625 @@ -10386,7 +10484,7 @@ def where( 4 10 dtype: int64 - >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B']) + >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=["A", "B"]) >>> df A B 0 0 1 @@ -10602,10 +10700,11 @@ def shift( Examples -------- - >>> df = pd.DataFrame({{"Col1": [10, 20, 15, 30, 45], - ... "Col2": [13, 23, 18, 33, 48], - ... "Col3": [17, 27, 22, 37, 52]}}, - ... index=pd.date_range("2020-01-01", "2020-01-05")) + >>> df = pd.DataFrame( + ... [[10, 13, 17], [20, 23, 27], [15, 18, 22], [30, 33, 37], [45, 48, 52]], + ... columns=["Col1", "Col2", "Col3"], + ... index=pd.date_range("2020-01-01", "2020-01-05"), + ... ) >>> df Col1 Col2 Col3 2020-01-01 10 13 17 @@ -10654,7 +10753,7 @@ def shift( 2020-01-07 30 33 37 2020-01-08 45 48 52 - >>> df['Col1'].shift(periods=[0, 1, 2]) + >>> df["Col1"].shift(periods=[0, 1, 2]) Col1_0 Col1_1 Col1_2 2020-01-01 10 NaN NaN 2020-01-02 20 10.0 NaN @@ -10787,10 +10886,14 @@ def truncate( Examples -------- - >>> df = pd.DataFrame({'A': ['a', 'b', 'c', 'd', 'e'], - ... 'B': ['f', 'g', 'h', 'i', 'j'], - ... 'C': ['k', 'l', 'm', 'n', 'o']}, - ... index=[1, 2, 3, 4, 5]) + >>> df = pd.DataFrame( + ... { + ... "A": ["a", "b", "c", "d", "e"], + ... "B": ["f", "g", "h", "i", "j"], + ... "C": ["k", "l", "m", "n", "o"], + ... }, + ... index=[1, 2, 3, 4, 5], + ... ) >>> df A B C 1 a f k @@ -10817,7 +10920,7 @@ def truncate( For Series, only rows can be truncated. - >>> df['A'].truncate(before=2, after=4) + >>> df["A"].truncate(before=2, after=4) 2 b 3 c 4 d @@ -10826,8 +10929,8 @@ def truncate( The index values in ``truncate`` can be datetimes or string dates. - >>> dates = pd.date_range('2016-01-01', '2016-02-01', freq='s') - >>> df = pd.DataFrame(index=dates, data={'A': 1}) + >>> dates = pd.date_range("2016-01-01", "2016-02-01", freq="s") + >>> df = pd.DataFrame(index=dates, data={"A": 1}) >>> df.tail() A 2016-01-31 23:59:56 1 @@ -10836,8 +10939,9 @@ def truncate( 2016-01-31 23:59:59 1 2016-02-01 00:00:00 1 - >>> df.truncate(before=pd.Timestamp('2016-01-05'), - ... after=pd.Timestamp('2016-01-10')).tail() + >>> df.truncate( + ... before=pd.Timestamp("2016-01-05"), after=pd.Timestamp("2016-01-10") + ... ).tail() A 2016-01-09 23:59:56 1 2016-01-09 23:59:57 1 @@ -10849,7 +10953,7 @@ def truncate( specify `before` and `after` as strings. They will be coerced to Timestamps before truncation. - >>> df.truncate('2016-01-05', '2016-01-10').tail() + >>> df.truncate("2016-01-05", "2016-01-10").tail() A 2016-01-09 23:59:56 1 2016-01-09 23:59:57 1 @@ -10861,7 +10965,7 @@ def truncate( component (midnight). This differs from partial string slicing, which returns any partially matching dates. - >>> df.loc['2016-01-05':'2016-01-10', :].tail() + >>> df.loc["2016-01-05":"2016-01-10", :].tail() A 2016-01-10 23:59:55 1 2016-01-10 23:59:56 1 @@ -10953,16 +11057,15 @@ def tz_convert( >>> s = pd.Series( ... [1], - ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00']), + ... index=pd.DatetimeIndex(["2018-09-15 01:30:00+02:00"]), ... ) - >>> s.tz_convert('Asia/Shanghai') + >>> s.tz_convert("Asia/Shanghai") 2018-09-15 07:30:00+08:00 1 dtype: int64 Pass None to convert to UTC and get a tz-naive index: - >>> s = pd.Series([1], - ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) + >>> s = pd.Series([1], index=pd.DatetimeIndex(["2018-09-15 01:30:00+02:00"])) >>> s.tz_convert(None) 2018-09-14 23:30:00 1 dtype: int64 @@ -11083,16 +11186,15 @@ def tz_localize( >>> s = pd.Series( ... [1], - ... index=pd.DatetimeIndex(['2018-09-15 01:30:00']), + ... index=pd.DatetimeIndex(["2018-09-15 01:30:00"]), ... ) - >>> s.tz_localize('CET') + >>> s.tz_localize("CET") 2018-09-15 01:30:00+02:00 1 dtype: int64 Pass None to convert to tz-naive index and preserve local time: - >>> s = pd.Series([1], - ... index=pd.DatetimeIndex(['2018-09-15 01:30:00+02:00'])) + >>> s = pd.Series([1], index=pd.DatetimeIndex(["2018-09-15 01:30:00+02:00"])) >>> s.tz_localize(None) 2018-09-15 01:30:00 1 dtype: int64 @@ -11100,15 +11202,21 @@ def tz_localize( Be careful with DST changes. When there is sequential data, pandas can infer the DST time: - >>> s = pd.Series(range(7), - ... index=pd.DatetimeIndex(['2018-10-28 01:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 02:00:00', - ... '2018-10-28 02:30:00', - ... '2018-10-28 03:00:00', - ... '2018-10-28 03:30:00'])) - >>> s.tz_localize('CET', ambiguous='infer') + >>> s = pd.Series( + ... range(7), + ... index=pd.DatetimeIndex( + ... [ + ... "2018-10-28 01:30:00", + ... "2018-10-28 02:00:00", + ... "2018-10-28 02:30:00", + ... "2018-10-28 02:00:00", + ... "2018-10-28 02:30:00", + ... "2018-10-28 03:00:00", + ... "2018-10-28 03:30:00", + ... ] + ... ), + ... ) + >>> s.tz_localize("CET", ambiguous="infer") 2018-10-28 01:30:00+02:00 0 2018-10-28 02:00:00+02:00 1 2018-10-28 02:30:00+02:00 2 @@ -11121,11 +11229,17 @@ def tz_localize( In some cases, inferring the DST is impossible. In such cases, you can pass an ndarray to the ambiguous parameter to set the DST explicitly - >>> s = pd.Series(range(3), - ... index=pd.DatetimeIndex(['2018-10-28 01:20:00', - ... '2018-10-28 02:36:00', - ... '2018-10-28 03:46:00'])) - >>> s.tz_localize('CET', ambiguous=np.array([True, True, False])) + >>> s = pd.Series( + ... range(3), + ... index=pd.DatetimeIndex( + ... [ + ... "2018-10-28 01:20:00", + ... "2018-10-28 02:36:00", + ... "2018-10-28 03:46:00", + ... ] + ... ), + ... ) + >>> s.tz_localize("CET", ambiguous=np.array([True, True, False])) 2018-10-28 01:20:00+02:00 0 2018-10-28 02:36:00+02:00 1 2018-10-28 03:46:00+01:00 2 @@ -11135,18 +11249,19 @@ def tz_localize( dates forward or backward with a timedelta object or `'shift_forward'` or `'shift_backward'`. - >>> s = pd.Series(range(2), - ... index=pd.DatetimeIndex(['2015-03-29 02:30:00', - ... '2015-03-29 03:30:00'])) - >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_forward') + >>> s = pd.Series( + ... range(2), + ... index=pd.DatetimeIndex(["2015-03-29 02:30:00", "2015-03-29 03:30:00"]), + ... ) + >>> s.tz_localize("Europe/Warsaw", nonexistent="shift_forward") 2015-03-29 03:00:00+02:00 0 2015-03-29 03:30:00+02:00 1 dtype: int64 - >>> s.tz_localize('Europe/Warsaw', nonexistent='shift_backward') + >>> s.tz_localize("Europe/Warsaw", nonexistent="shift_backward") 2015-03-29 01:59:59.999999999+01:00 0 2015-03-29 03:30:00+02:00 1 dtype: int64 - >>> s.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1h')) + >>> s.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta("1h")) 2015-03-29 03:30:00+02:00 0 2015-03-29 03:30:00+02:00 1 dtype: int64 @@ -11307,7 +11422,7 @@ def describe( Describing a categorical ``Series``. - >>> s = pd.Series(['a', 'a', 'b', 'c']) + >>> s = pd.Series(["a", "a", "b", "c"]) >>> s.describe() count 4 unique 3 @@ -11317,11 +11432,13 @@ def describe( Describing a timestamp ``Series``. - >>> s = pd.Series([ - ... np.datetime64("2000-01-01"), - ... np.datetime64("2010-01-01"), - ... np.datetime64("2010-01-01") - ... ]) + >>> s = pd.Series( + ... [ + ... np.datetime64("2000-01-01"), + ... np.datetime64("2010-01-01"), + ... np.datetime64("2010-01-01"), + ... ] + ... ) >>> s.describe() count 3 mean 2006-09-01 08:00:00 @@ -11335,10 +11452,13 @@ def describe( Describing a ``DataFrame``. By default only numeric fields are returned. - >>> df = pd.DataFrame({'categorical': pd.Categorical(['d', 'e', 'f']), - ... 'numeric': [1, 2, 3], - ... 'object': ['a', 'b', 'c'] - ... }) + >>> df = pd.DataFrame( + ... { + ... "categorical": pd.Categorical(["d", "e", "f"]), + ... "numeric": [1, 2, 3], + ... "object": ["a", "b", "c"], + ... } + ... ) >>> df.describe() numeric count 3.0 @@ -11352,7 +11472,7 @@ def describe( Describing all columns of a ``DataFrame`` regardless of data type. - >>> df.describe(include='all') # doctest: +SKIP + >>> df.describe(include="all") # doctest: +SKIP categorical numeric object count 3 3.0 3 unique 3 NaN 3 @@ -11404,7 +11524,7 @@ def describe( Including only categorical columns from a ``DataFrame`` description. - >>> df.describe(include=['category']) + >>> df.describe(include=["category"]) categorical count 3 unique 3 @@ -11545,11 +11665,14 @@ def pct_change( Percentage change in French franc, Deutsche Mark, and Italian lira from 1980-01-01 to 1980-03-01. - >>> df = pd.DataFrame({ - ... 'FR': [4.0405, 4.0963, 4.3149], - ... 'GR': [1.7246, 1.7482, 1.8519], - ... 'IT': [804.74, 810.01, 860.13]}, - ... index=['1980-01-01', '1980-02-01', '1980-03-01']) + >>> df = pd.DataFrame( + ... { + ... "FR": [4.0405, 4.0963, 4.3149], + ... "GR": [1.7246, 1.7482, 1.8519], + ... "IT": [804.74, 810.01, 860.13], + ... }, + ... index=["1980-01-01", "1980-02-01", "1980-03-01"], + ... ) >>> df FR GR IT 1980-01-01 4.0405 1.7246 804.74 @@ -11565,17 +11688,20 @@ def pct_change( Percentage of change in GOOG and APPL stock volume. Shows computing the percentage change between columns. - >>> df = pd.DataFrame({ - ... '2016': [1769950, 30586265], - ... '2015': [1500923, 40912316], - ... '2014': [1371819, 41403351]}, - ... index=['GOOG', 'APPL']) + >>> df = pd.DataFrame( + ... { + ... "2016": [1769950, 30586265], + ... "2015": [1500923, 40912316], + ... "2014": [1371819, 41403351], + ... }, + ... index=["GOOG", "APPL"], + ... ) >>> df 2016 2015 2014 GOOG 1769950 1500923 1371819 APPL 30586265 40912316 41403351 - >>> df.pct_change(axis='columns', periods=-1) + >>> df.pct_change(axis="columns", periods=-1) 2016 2015 2014 GOOG 0.179241 0.094112 NaN APPL -0.252395 -0.011860 NaN @@ -12200,7 +12326,7 @@ def first_valid_index(self) -> Hashable | None: For DataFrame: - >>> df = pd.DataFrame({{'A': [None, None, 2], 'B': [None, 3, 4]}}) + >>> df = pd.DataFrame({{"A": [None, None, 2], "B": [None, 3, 4]}}) >>> df A B 0 NaN NaN @@ -12211,7 +12337,7 @@ def first_valid_index(self) -> Hashable | None: >>> df.last_valid_index() 2 - >>> df = pd.DataFrame({{'A': [None, None, None], 'B': [None, None, None]}}) + >>> df = pd.DataFrame({{"A": [None, None, None], "B": [None, None, None]}}) >>> df A B 0 None None diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index f68a5f605e331..c4037dad1f828 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -583,12 +583,15 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): Examples -------- - >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - ... 'foo', 'bar'], - ... 'B' : [1, 2, 3, 4, 5, 6], - ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) - >>> grouped = df.groupby('A') - >>> df.groupby('A').B.filter(lambda x: x.mean() > 3.) + >>> df = pd.DataFrame( + ... { + ... "A": ["foo", "bar", "foo", "bar", "foo", "bar"], + ... "B": [1, 2, 3, 4, 5, 6], + ... "C": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], + ... } + ... ) + >>> grouped = df.groupby("A") + >>> df.groupby("A").B.filter(lambda x: x.mean() > 3.0) 1 2 3 4 5 6 @@ -629,7 +632,7 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame: -------- For SeriesGroupby: - >>> lst = ['a', 'a', 'b', 'b'] + >>> lst = ["a", "a", "b", "b"] >>> ser = pd.Series([1, 2, 3, 3], index=lst) >>> ser a 1 @@ -644,15 +647,19 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame: For Resampler: - >>> ser = pd.Series([1, 2, 3, 3], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 3], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 3 dtype: int64 - >>> ser.resample('MS').nunique() + >>> ser.resample("MS").nunique() 2023-01-01 2 2023-02-01 1 Freq: MS, dtype: int64 @@ -911,13 +918,17 @@ def take( Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', np.nan), - ... ('rabbit', 'mammal', 15.0)], - ... columns=['name', 'class', 'max_speed'], - ... index=[4, 3, 2, 1, 0]) + >>> df = pd.DataFrame( + ... [ + ... ("falcon", "bird", 389.0), + ... ("parrot", "bird", 24.0), + ... ("lion", "mammal", 80.5), + ... ("monkey", "mammal", np.nan), + ... ("rabbit", "mammal", 15.0), + ... ], + ... columns=["name", "class", "max_speed"], + ... index=[4, 3, 2, 1, 0], + ... ) >>> df name class max_speed 4 falcon bird 389.0 @@ -981,10 +992,19 @@ def skew( Examples -------- - >>> ser = pd.Series([390., 350., 357., np.nan, 22., 20., 30.], - ... index=['Falcon', 'Falcon', 'Falcon', 'Falcon', - ... 'Parrot', 'Parrot', 'Parrot'], - ... name="Max Speed") + >>> ser = pd.Series( + ... [390.0, 350.0, 357.0, np.nan, 22.0, 20.0, 30.0], + ... index=[ + ... "Falcon", + ... "Falcon", + ... "Falcon", + ... "Falcon", + ... "Parrot", + ... "Parrot", + ... "Parrot", + ... ], + ... name="Max Speed", + ... ) >>> ser Falcon 390.0 Falcon 350.0 @@ -1075,8 +1095,12 @@ def idxmin(self, skipna: bool = True) -> Series: Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 @@ -1084,7 +1108,7 @@ def idxmin(self, skipna: bool = True) -> Series: 2023-02-15 4 dtype: int64 - >>> ser.groupby(['a', 'a', 'b', 'b']).idxmin() + >>> ser.groupby(["a", "a", "b", "b"]).idxmin() a 2023-01-01 b 2023-02-01 dtype: datetime64[ns] @@ -1125,8 +1149,12 @@ def idxmax(self, skipna: bool = True) -> Series: Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 @@ -1134,7 +1162,7 @@ def idxmax(self, skipna: bool = True) -> Series: 2023-02-15 4 dtype: int64 - >>> ser.groupby(['a', 'a', 'b', 'b']).idxmax() + >>> ser.groupby(["a", "a", "b", "b"]).idxmax() a 2023-01-15 b 2023-02-15 dtype: datetime64[ns] @@ -1173,7 +1201,7 @@ def is_monotonic_increasing(self) -> Series: Examples -------- - >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot']) + >>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"]) >>> s.groupby(level=0).is_monotonic_increasing Falcon False Parrot True @@ -1192,7 +1220,7 @@ def is_monotonic_decreasing(self) -> Series: Examples -------- - >>> s = pd.Series([2, 1, 3, 4], index=['Falcon', 'Falcon', 'Parrot', 'Parrot']) + >>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"]) >>> s.groupby(level=0).is_monotonic_decreasing Falcon True Parrot False @@ -1256,13 +1284,17 @@ def unique(self) -> Series: Examples -------- - >>> df = pd.DataFrame([('Chihuahua', 'dog', 6.1), - ... ('Beagle', 'dog', 15.2), - ... ('Chihuahua', 'dog', 6.9), - ... ('Persian', 'cat', 9.2), - ... ('Chihuahua', 'dog', 7), - ... ('Persian', 'cat', 8.8)], - ... columns=['breed', 'animal', 'height_in']) + >>> df = pd.DataFrame( + ... [ + ... ("Chihuahua", "dog", 6.1), + ... ("Beagle", "dog", 15.2), + ... ("Chihuahua", "dog", 6.9), + ... ("Persian", "cat", 9.2), + ... ("Chihuahua", "dog", 7), + ... ("Persian", "cat", 8.8), + ... ], + ... columns=["breed", "animal", "height_in"], + ... ) >>> df breed animal height_in 0 Chihuahua dog 6.1 @@ -1271,7 +1303,7 @@ def unique(self) -> Series: 3 Persian cat 9.2 4 Chihuahua dog 7.0 5 Persian cat 8.8 - >>> ser = df.groupby('animal')['breed'].unique() + >>> ser = df.groupby("animal")["breed"].unique() >>> ser animal cat [Persian] @@ -1826,12 +1858,15 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame: Examples -------- - >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', - ... 'foo', 'bar'], - ... 'B' : [1, 2, 3, 4, 5, 6], - ... 'C' : [2.0, 5., 8., 1., 2., 9.]}) - >>> grouped = df.groupby('A') - >>> grouped.filter(lambda x: x['B'].mean() > 3.) + >>> df = pd.DataFrame( + ... { + ... "A": ["foo", "bar", "foo", "bar", "foo", "bar"], + ... "B": [1, 2, 3, 4, 5, 6], + ... "C": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], + ... } + ... ) + >>> grouped = df.groupby("A") + >>> grouped.filter(lambda x: x["B"].mean() > 3.0) A B C 1 bar 2 5.0 3 bar 4 1.0 @@ -1981,10 +2016,13 @@ def nunique(self, dropna: bool = True) -> DataFrame: Examples -------- - >>> df = pd.DataFrame({'id': ['spam', 'egg', 'egg', 'spam', - ... 'ham', 'ham'], - ... 'value1': [1, 5, 5, 2, 5, 5], - ... 'value2': list('abbaxy')}) + >>> df = pd.DataFrame( + ... { + ... "id": ["spam", "egg", "egg", "spam", "ham", "ham"], + ... "value1": [1, 5, 5, 2, 5, 5], + ... "value2": list("abbaxy"), + ... } + ... ) >>> df id value1 value2 0 spam 1 a @@ -1994,7 +2032,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: 4 ham 5 x 5 ham 5 y - >>> df.groupby('id').nunique() + >>> df.groupby("id").nunique() value1 value2 id egg 1 1 @@ -2003,7 +2041,7 @@ def nunique(self, dropna: bool = True) -> DataFrame: Check for rows with the same id but conflicting values: - >>> df.groupby('id').filter(lambda g: (g.nunique() > 1).any()) + >>> df.groupby("id").filter(lambda g: (g.nunique() > 1).any()) id value1 value2 0 spam 1 a 3 spam 2 a @@ -2054,9 +2092,13 @@ def idxmax( -------- Consider a dataset containing food consumption in Argentina. - >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], - ... 'co2_emissions': [37.2, 19.66, 1712]}, - ... index=['Pork', 'Wheat Products', 'Beef']) + >>> df = pd.DataFrame( + ... { + ... "consumption": [10.51, 103.11, 55.48], + ... "co2_emissions": [37.2, 19.66, 1712], + ... }, + ... index=["Pork", "Wheat Products", "Beef"], + ... ) >>> df consumption co2_emissions @@ -2115,9 +2157,13 @@ def idxmin( -------- Consider a dataset containing food consumption in Argentina. - >>> df = pd.DataFrame({'consumption': [10.51, 103.11, 55.48], - ... 'co2_emissions': [37.2, 19.66, 1712]}, - ... index=['Pork', 'Wheat Products', 'Beef']) + >>> df = pd.DataFrame( + ... { + ... "consumption": [10.51, 103.11, 55.48], + ... "co2_emissions": [37.2, 19.66, 1712], + ... }, + ... index=["Pork", "Wheat Products", "Beef"], + ... ) >>> df consumption co2_emissions @@ -2189,11 +2235,13 @@ def value_counts( Examples -------- - >>> df = pd.DataFrame({ - ... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'], - ... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'], - ... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR'] - ... }) + >>> df = pd.DataFrame( + ... { + ... "gender": ["male", "male", "female", "male", "female", "male"], + ... "education": ["low", "medium", "high", "low", "high", "low"], + ... "country": ["US", "FR", "US", "FR", "FR", "FR"], + ... } + ... ) >>> df gender education country @@ -2204,7 +2252,7 @@ def value_counts( 4 female high FR 5 male low FR - >>> df.groupby('gender').value_counts() + >>> df.groupby("gender").value_counts() gender education country female high FR 1 US 1 @@ -2213,7 +2261,7 @@ def value_counts( medium FR 1 Name: count, dtype: int64 - >>> df.groupby('gender').value_counts(ascending=True) + >>> df.groupby("gender").value_counts(ascending=True) gender education country female high FR 1 US 1 @@ -2222,7 +2270,7 @@ def value_counts( low FR 2 Name: count, dtype: int64 - >>> df.groupby('gender').value_counts(normalize=True) + >>> df.groupby("gender").value_counts(normalize=True) gender education country female high FR 0.50 US 0.50 @@ -2231,7 +2279,7 @@ def value_counts( medium FR 0.25 Name: proportion, dtype: float64 - >>> df.groupby('gender', as_index=False).value_counts() + >>> df.groupby("gender", as_index=False).value_counts() gender education country count 0 female high FR 1 1 female high US 1 @@ -2239,7 +2287,7 @@ def value_counts( 3 male low US 1 4 male medium FR 1 - >>> df.groupby('gender', as_index=False).value_counts(normalize=True) + >>> df.groupby("gender", as_index=False).value_counts(normalize=True) gender education country proportion 0 female high FR 0.50 1 female high US 0.50 @@ -2288,13 +2336,17 @@ def take( Examples -------- - >>> df = pd.DataFrame([('falcon', 'bird', 389.0), - ... ('parrot', 'bird', 24.0), - ... ('lion', 'mammal', 80.5), - ... ('monkey', 'mammal', np.nan), - ... ('rabbit', 'mammal', 15.0)], - ... columns=['name', 'class', 'max_speed'], - ... index=[4, 3, 2, 1, 0]) + >>> df = pd.DataFrame( + ... [ + ... ("falcon", "bird", 389.0), + ... ("parrot", "bird", 24.0), + ... ("lion", "mammal", 80.5), + ... ("monkey", "mammal", np.nan), + ... ("rabbit", "mammal", 15.0), + ... ], + ... columns=["name", "class", "max_speed"], + ... index=[4, 3, 2, 1, 0], + ... ) >>> df name class max_speed 4 falcon bird 389.0 @@ -2372,14 +2424,15 @@ def skew( Examples -------- - >>> arrays = [['falcon', 'parrot', 'cockatoo', 'kiwi', - ... 'lion', 'monkey', 'rabbit'], - ... ['bird', 'bird', 'bird', 'bird', - ... 'mammal', 'mammal', 'mammal']] - >>> index = pd.MultiIndex.from_arrays(arrays, names=('name', 'class')) - >>> df = pd.DataFrame({'max_speed': [389.0, 24.0, 70.0, np.nan, - ... 80.5, 21.5, 15.0]}, - ... index=index) + >>> arrays = [ + ... ["falcon", "parrot", "cockatoo", "kiwi", "lion", "monkey", "rabbit"], + ... ["bird", "bird", "bird", "bird", "mammal", "mammal", "mammal"], + ... ] + >>> index = pd.MultiIndex.from_arrays(arrays, names=("name", "class")) + >>> df = pd.DataFrame( + ... {"max_speed": [389.0, 24.0, 70.0, np.nan, 80.5, 21.5, 15.0]}, + ... index=index, + ... ) >>> df max_speed name class @@ -2548,10 +2601,18 @@ def corrwith( Examples -------- - >>> df1 = pd.DataFrame({"Day": [1, 1, 1, 2, 2, 2, 3, 3, 3], - ... "Data": [6, 6, 8, 5, 4, 2, 7, 3, 9]}) - >>> df2 = pd.DataFrame({"Day": [1, 1, 1, 2, 2, 2, 3, 3, 3], - ... "Data": [5, 3, 8, 3, 1, 1, 2, 3, 6]}) + >>> df1 = pd.DataFrame( + ... { + ... "Day": [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... "Data": [6, 6, 8, 5, 4, 2, 7, 3, 9], + ... } + ... ) + >>> df2 = pd.DataFrame( + ... { + ... "Day": [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... "Data": [5, 3, 8, 3, 1, 1, 2, 3, 6], + ... } + ... ) >>> df1.groupby("Day").corrwith(df2) Data Day diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1440bd0adfd26..4106e5c46e00c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -800,7 +800,7 @@ def groups(self) -> dict[Hashable, Index]: For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 3], index=lst) >>> ser a 1 @@ -824,15 +824,19 @@ def groups(self) -> dict[Hashable, Index]: For Resampler: - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').groups + >>> ser.resample("MS").groups {Timestamp('2023-01-01 00:00:00'): 2, Timestamp('2023-02-01 00:00:00'): 4} """ return self._grouper.groups @@ -853,7 +857,7 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 3], index=lst) >>> ser a 1 @@ -866,8 +870,9 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: For DataFrameGroupBy: >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["owl", "toucan", "eagle"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["owl", "toucan", "eagle"] + ... ) >>> df a b c owl 1 2 3 @@ -878,15 +883,19 @@ def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: For Resampler: - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').indices + >>> ser.resample("MS").indices defaultdict(, {Timestamp('2023-01-01 00:00:00'): [0, 1], Timestamp('2023-02-01 00:00:00'): [2, 3]}) """ @@ -1043,7 +1052,7 @@ def get_group(self, name) -> DataFrame | Series: For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 3], index=lst) >>> ser a 1 @@ -1058,8 +1067,9 @@ def get_group(self, name) -> DataFrame | Series: For DataFrameGroupBy: >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["owl", "toucan", "eagle"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["owl", "toucan", "eagle"] + ... ) >>> df a b c owl 1 2 3 @@ -1072,15 +1082,19 @@ def get_group(self, name) -> DataFrame | Series: For Resampler: - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').get_group('2023-01-01') + >>> ser.resample("MS").get_group("2023-01-01") 2023-01-01 1 2023-01-15 2 dtype: int64 @@ -1125,7 +1139,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 3], index=lst) >>> ser a 1 @@ -1133,7 +1147,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: b 3 dtype: int64 >>> for x, y in ser.groupby(level=0): - ... print(f'{x}\\n{y}\\n') + ... print(f"{x}\\n{y}\\n") a a 1 a 2 @@ -1152,7 +1166,7 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: 1 1 5 6 2 7 8 9 >>> for x, y in df.groupby(by=["a"]): - ... print(f'{x}\\n{y}\\n') + ... print(f"{x}\\n{y}\\n") (1,) a b c 0 1 2 3 @@ -1163,16 +1177,20 @@ def __iter__(self) -> Iterator[tuple[Hashable, NDFrameT]]: For Resampler: - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> for x, y in ser.resample('MS'): - ... print(f'{x}\\n{y}\\n') + >>> for x, y in ser.resample("MS"): + ... print(f"{x}\\n{y}\\n") 2023-01-01 00:00:00 2023-01-01 1 2023-01-15 2 @@ -2079,7 +2097,7 @@ def any(self, skipna: bool = True) -> NDFrameT: -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 0], index=lst) >>> ser a 1 @@ -2094,8 +2112,9 @@ def any(self, skipna: bool = True) -> NDFrameT: For DataFrameGroupBy: >>> data = [[1, 0, 3], [1, 0, 6], [7, 1, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["ostrich", "penguin", "parrot"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["ostrich", "penguin", "parrot"] + ... ) >>> df a b c ostrich 1 0 3 @@ -2136,7 +2155,7 @@ def all(self, skipna: bool = True) -> NDFrameT: For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 0], index=lst) >>> ser a 1 @@ -2151,8 +2170,9 @@ def all(self, skipna: bool = True) -> NDFrameT: For DataFrameGroupBy: >>> data = [[1, 0, 3], [1, 5, 6], [7, 8, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["ostrich", "penguin", "parrot"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["ostrich", "penguin", "parrot"] + ... ) >>> df a b c ostrich 1 0 3 @@ -2186,7 +2206,7 @@ def count(self) -> NDFrameT: -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, np.nan], index=lst) >>> ser a 1.0 @@ -2201,8 +2221,9 @@ def count(self) -> NDFrameT: For DataFrameGroupBy: >>> data = [[1, np.nan, 3], [1, np.nan, 6], [7, 8, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["cow", "horse", "bull"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["cow", "horse", "bull"] + ... ) >>> df a b c cow 1 NaN 3 @@ -2216,15 +2237,19 @@ def count(self) -> NDFrameT: For Resampler: - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').count() + >>> ser.resample("MS").count() 2023-01-01 2 2023-02-01 2 Freq: MS, dtype: int64 @@ -2309,14 +2334,15 @@ def mean( %(see_also)s Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], - ... 'B': [np.nan, 2, 3, 4, 5], - ... 'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C']) + >>> df = pd.DataFrame( + ... {"A": [1, 1, 2, 1, 2], "B": [np.nan, 2, 3, 4, 5], "C": [1, 2, 1, 1, 2]}, + ... columns=["A", "B", "C"], + ... ) Groupby one column and return the mean of the remaining columns in each group. - >>> df.groupby('A').mean() + >>> df.groupby("A").mean() B C A 1 3.0 1.333333 @@ -2324,7 +2350,7 @@ def mean( Groupby two columns and return the mean of the remaining column. - >>> df.groupby(['A', 'B']).mean() + >>> df.groupby(["A", "B"]).mean() C A B 1 2.0 2.0 @@ -2335,7 +2361,7 @@ def mean( Groupby one column and return the mean of only particular column in the group. - >>> df.groupby('A')['B'].mean() + >>> df.groupby("A")["B"].mean() A 1 3.0 2 4.0 @@ -2384,7 +2410,7 @@ def median(self, numeric_only: bool = False) -> NDFrameT: -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) >>> ser a 7 @@ -2401,9 +2427,10 @@ def median(self, numeric_only: bool = False) -> NDFrameT: For DataFrameGroupBy: - >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} - >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', - ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> data = {"a": [1, 3, 5, 7, 7, 8, 3], "b": [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame( + ... data, index=["dog", "dog", "dog", "mouse", "mouse", "mouse", "mouse"] + ... ) >>> df a b dog 1 1 @@ -2420,14 +2447,20 @@ def median(self, numeric_only: bool = False) -> NDFrameT: For Resampler: - >>> ser = pd.Series([1, 2, 3, 3, 4, 5], - ... index=pd.DatetimeIndex(['2023-01-01', - ... '2023-01-10', - ... '2023-01-15', - ... '2023-02-01', - ... '2023-02-10', - ... '2023-02-15'])) - >>> ser.resample('MS').median() + >>> ser = pd.Series( + ... [1, 2, 3, 3, 4, 5], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").median() 2023-01-01 2.0 2023-02-01 4.0 Freq: MS, dtype: float64 @@ -2494,7 +2527,7 @@ def std( -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) >>> ser a 7 @@ -2511,9 +2544,10 @@ def std( For DataFrameGroupBy: - >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} - >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', - ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> data = {"a": [1, 3, 5, 7, 7, 8, 3], "b": [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame( + ... data, index=["dog", "dog", "dog", "mouse", "mouse", "mouse", "mouse"] + ... ) >>> df a b dog 1 1 @@ -2603,7 +2637,7 @@ def var( -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) >>> ser a 7 @@ -2620,9 +2654,10 @@ def var( For DataFrameGroupBy: - >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} - >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', - ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> data = {"a": [1, 3, 5, 7, 7, 8, 3], "b": [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame( + ... data, index=["dog", "dog", "dog", "mouse", "mouse", "mouse", "mouse"] + ... ) >>> df a b dog 1 1 @@ -2811,7 +2846,7 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'b', 'b'] + >>> lst = ["a", "a", "b", "b"] >>> ser = pd.Series([5, 10, 8, 14], index=lst) >>> ser a 5 @@ -2827,8 +2862,11 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: For DataFrameGroupBy: >>> data = [[1, 12, 11], [1, 15, 2], [2, 5, 8], [2, 6, 12]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["tuna", "salmon", "catfish", "goldfish"]) + >>> df = pd.DataFrame( + ... data, + ... columns=["a", "b", "c"], + ... index=["tuna", "salmon", "catfish", "goldfish"], + ... ) >>> df a b c tuna 1 12 11 @@ -2843,14 +2881,20 @@ def sem(self, ddof: int = 1, numeric_only: bool = False) -> NDFrameT: For Resampler: - >>> ser = pd.Series([1, 3, 2, 4, 3, 8], - ... index=pd.DatetimeIndex(['2023-01-01', - ... '2023-01-10', - ... '2023-01-15', - ... '2023-02-01', - ... '2023-02-10', - ... '2023-02-15'])) - >>> ser.resample('MS').sem() + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").sem() 2023-01-01 0.577350 2023-02-01 1.527525 Freq: MS, dtype: float64 @@ -2885,7 +2929,7 @@ def size(self) -> DataFrame | Series: For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([1, 2, 3], index=lst) >>> ser a 1 @@ -2898,8 +2942,9 @@ def size(self) -> DataFrame | Series: dtype: int64 >>> data = [[1, 2, 3], [1, 5, 6], [7, 8, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["owl", "toucan", "eagle"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["owl", "toucan", "eagle"] + ... ) >>> df a b c owl 1 2 3 @@ -2913,14 +2958,16 @@ def size(self) -> DataFrame | Series: For Resampler: - >>> ser = pd.Series([1, 2, 3], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01'])) + >>> ser = pd.Series( + ... [1, 2, 3], + ... index=pd.DatetimeIndex(["2023-01-01", "2023-01-15", "2023-02-01"]), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 dtype: int64 - >>> ser.resample('MS').size() + >>> ser.resample("MS").size() 2023-01-01 2 2023-02-01 1 Freq: MS, dtype: int64 @@ -3252,9 +3299,15 @@ def first( Examples -------- - >>> df = pd.DataFrame(dict(A=[1, 1, 3], B=[None, 5, 6], C=[1, 2, 3], - ... D=['3/11/2000', '3/12/2000', '3/13/2000'])) - >>> df['D'] = pd.to_datetime(df['D']) + >>> df = pd.DataFrame( + ... dict( + ... A=[1, 1, 3], + ... B=[None, 5, 6], + ... C=[1, 2, 3], + ... D=["3/11/2000", "3/12/2000", "3/13/2000"], + ... ) + ... ) + >>> df["D"] = pd.to_datetime(df["D"]) >>> df.groupby("A").first() B C D A @@ -3381,7 +3434,16 @@ def ohlc(self) -> DataFrame: For SeriesGroupBy: - >>> lst = ['SPX', 'CAC', 'SPX', 'CAC', 'SPX', 'CAC', 'SPX', 'CAC',] + >>> lst = [ + ... "SPX", + ... "CAC", + ... "SPX", + ... "CAC", + ... "SPX", + ... "CAC", + ... "SPX", + ... "CAC", + ... ] >>> ser = pd.Series([3.4, 9.0, 7.2, 5.2, 8.8, 9.4, 0.1, 0.5], index=lst) >>> ser SPX 3.4 @@ -3400,10 +3462,13 @@ def ohlc(self) -> DataFrame: For DataFrameGroupBy: - >>> data = {2022: [1.2, 2.3, 8.9, 4.5, 4.4, 3, 2 , 1], - ... 2023: [3.4, 9.0, 7.2, 5.2, 8.8, 9.4, 8.2, 1.0]} - >>> df = pd.DataFrame(data, index=['SPX', 'CAC', 'SPX', 'CAC', - ... 'SPX', 'CAC', 'SPX', 'CAC']) + >>> data = { + ... 2022: [1.2, 2.3, 8.9, 4.5, 4.4, 3, 2, 1], + ... 2023: [3.4, 9.0, 7.2, 5.2, 8.8, 9.4, 8.2, 1.0], + ... } + >>> df = pd.DataFrame( + ... data, index=["SPX", "CAC", "SPX", "CAC", "SPX", "CAC", "SPX", "CAC"] + ... ) >>> df 2022 2023 SPX 1.2 3.4 @@ -3422,14 +3487,20 @@ def ohlc(self) -> DataFrame: For Resampler: - >>> ser = pd.Series([1, 3, 2, 4, 3, 5], - ... index=pd.DatetimeIndex(['2023-01-01', - ... '2023-01-10', - ... '2023-01-15', - ... '2023-02-01', - ... '2023-02-10', - ... '2023-02-15'])) - >>> ser.resample('MS').ohlc() + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 5], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").ohlc() open high low close 2023-01-01 1 3 1 2 2023-02-01 4 5 3 5 @@ -3542,10 +3613,8 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp Examples -------- - >>> idx = pd.date_range('1/1/2000', periods=4, freq='min') - >>> df = pd.DataFrame(data=4 * [range(2)], - ... index=idx, - ... columns=['a', 'b']) + >>> idx = pd.date_range("1/1/2000", periods=4, freq="min") + >>> df = pd.DataFrame(data=4 * [range(2)], index=idx, columns=["a", "b"]) >>> df.iloc[2, 0] = 5 >>> df a b @@ -3557,7 +3626,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp Downsample the DataFrame into 3 minute bins and sum the values of the timestamps falling into a bin. - >>> df.groupby('a').resample('3min', include_groups=False).sum() + >>> df.groupby("a").resample("3min", include_groups=False).sum() b a 0 2000-01-01 00:00:00 2 @@ -3566,7 +3635,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp Upsample the series into 30 second bins. - >>> df.groupby('a').resample('30s', include_groups=False).sum() + >>> df.groupby("a").resample("30s", include_groups=False).sum() b a 0 2000-01-01 00:00:00 1 @@ -3580,7 +3649,7 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp Resample by month. Values are assigned to the month of the period. - >>> df.groupby('a').resample('ME', include_groups=False).sum() + >>> df.groupby("a").resample("ME", include_groups=False).sum() b a 0 2000-01-31 3 @@ -3590,8 +3659,8 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp side of the bin interval. >>> ( - ... df.groupby('a') - ... .resample('3min', closed='right', include_groups=False) + ... df.groupby("a") + ... .resample("3min", closed="right", include_groups=False) ... .sum() ... ) b @@ -3605,8 +3674,8 @@ def resample(self, rule, *args, include_groups: bool = True, **kwargs) -> Resamp the left. >>> ( - ... df.groupby('a') - ... .resample('3min', closed='right', label='right', include_groups=False) + ... df.groupby("a") + ... .resample("3min", closed="right", label="right", include_groups=False) ... .sum() ... ) b @@ -3712,9 +3781,13 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 2, 2], - ... 'B': [1, 2, 3, 4], - ... 'C': [0.362, 0.227, 1.267, -0.562]}) + >>> df = pd.DataFrame( + ... { + ... "A": [1, 1, 2, 2], + ... "B": [1, 2, 3, 4], + ... "C": [0.362, 0.227, 1.267, -0.562], + ... } + ... ) >>> df A B C 0 1 1 0.362 @@ -3722,7 +3795,7 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: 2 2 3 1.267 3 2 4 -0.562 - >>> df.groupby('A').rolling(2).sum() + >>> df.groupby("A").rolling(2).sum() B C A 1 0 NaN NaN @@ -3730,7 +3803,7 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: 2 2 NaN NaN 3 7.0 0.705 - >>> df.groupby('A').rolling(2, min_periods=1).sum() + >>> df.groupby("A").rolling(2, min_periods=1).sum() B C A 1 0 1.0 0.362 @@ -3738,7 +3811,7 @@ def rolling(self, *args, **kwargs) -> RollingGroupby: 2 2 3.0 1.267 3 7.0 0.705 - >>> df.groupby('A').rolling(2, on='B').sum() + >>> df.groupby("A").rolling(2, on="B").sum() B C A 1 0 1 NaN @@ -3993,7 +4066,7 @@ def bfill(self, limit: int | None = None): With Series: - >>> index = ['Falcon', 'Falcon', 'Parrot', 'Parrot', 'Parrot'] + >>> index = ["Falcon", "Falcon", "Parrot", "Parrot", "Parrot"] >>> s = pd.Series([None, 1, None, None, 3], index=index) >>> s Falcon NaN @@ -4019,8 +4092,10 @@ def bfill(self, limit: int | None = None): With DataFrame: - >>> df = pd.DataFrame({'A': [1, None, None, None, 4], - ... 'B': [None, None, 5, None, 7]}, index=index) + >>> df = pd.DataFrame( + ... {"A": [1, None, None, None, 4], "B": [None, None, 5, None, 7]}, + ... index=index, + ... ) >>> df A B Falcon 1.0 NaN @@ -4081,9 +4156,10 @@ def nth(self) -> GroupByNthSelector: Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], - ... 'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B']) - >>> g = df.groupby('A') + >>> df = pd.DataFrame( + ... {"A": [1, 1, 2, 1, 2], "B": [np.nan, 2, 3, 4, 5]}, columns=["A", "B"] + ... ) + >>> g = df.groupby("A") >>> g.nth(0) A B 0 1 NaN @@ -4124,7 +4200,7 @@ def nth(self) -> GroupByNthSelector: Specifying `dropna` allows ignoring ``NaN`` values - >>> g.nth(0, dropna='any') + >>> g.nth(0, dropna="any") A B 1 1 2.0 2 2 3.0 @@ -4132,7 +4208,7 @@ def nth(self) -> GroupByNthSelector: When the specified ``n`` is larger than any of the groups, an empty DataFrame is returned - >>> g.nth(3, dropna='any') + >>> g.nth(3, dropna="any") Empty DataFrame Columns: [A, B] Index: [] @@ -4232,11 +4308,11 @@ def quantile( Examples -------- - >>> df = pd.DataFrame([ - ... ['a', 1], ['a', 2], ['a', 3], - ... ['b', 1], ['b', 3], ['b', 5] - ... ], columns=['key', 'val']) - >>> df.groupby('key').quantile() + >>> df = pd.DataFrame( + ... [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 3], ["b", 5]], + ... columns=["key", "val"], + ... ) + >>> df.groupby("key").quantile() val key a 2.0 @@ -4533,8 +4609,7 @@ def cumcount(self, ascending: bool = True): Examples -------- - >>> df = pd.DataFrame([['a'], ['a'], ['a'], ['b'], ['b'], ['a']], - ... columns=['A']) + >>> df = pd.DataFrame([["a"], ["a"], ["a"], ["b"], ["b"], ["a"]], columns=["A"]) >>> df A 0 a @@ -4543,7 +4618,7 @@ def cumcount(self, ascending: bool = True): 3 b 4 b 5 a - >>> df.groupby('A').cumcount() + >>> df.groupby("A").cumcount() 0 0 1 1 2 2 @@ -4551,7 +4626,7 @@ def cumcount(self, ascending: bool = True): 4 1 5 3 dtype: int64 - >>> df.groupby('A').cumcount(ascending=False) + >>> df.groupby("A").cumcount(ascending=False) 0 3 1 2 2 1 @@ -4618,8 +4693,8 @@ def rank( 7 b 4 8 b 1 9 b 5 - >>> for method in ['average', 'min', 'max', 'dense', 'first']: - ... df[f'{method}_rank'] = df.groupby('group')['value'].rank(method) + >>> for method in ["average", "min", "max", "dense", "first"]: + ... df[f"{method}_rank"] = df.groupby("group")["value"].rank(method) >>> df group value average_rank min_rank max_rank dense_rank first_rank 0 a 2 1.5 1.0 2.0 1.0 1.0 @@ -4665,7 +4740,7 @@ def cumprod(self, *args, **kwargs) -> NDFrameT: -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([6, 2, 0], index=lst) >>> ser a 6 @@ -4681,8 +4756,9 @@ def cumprod(self, *args, **kwargs) -> NDFrameT: For DataFrameGroupBy: >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["cow", "horse", "bull"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["cow", "horse", "bull"] + ... ) >>> df a b c cow 1 8 2 @@ -4714,7 +4790,7 @@ def cumsum(self, *args, **kwargs) -> NDFrameT: -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'b'] + >>> lst = ["a", "a", "b"] >>> ser = pd.Series([6, 2, 0], index=lst) >>> ser a 6 @@ -4730,8 +4806,9 @@ def cumsum(self, *args, **kwargs) -> NDFrameT: For DataFrameGroupBy: >>> data = [[1, 8, 2], [1, 2, 5], [2, 6, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["fox", "gorilla", "lion"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["fox", "gorilla", "lion"] + ... ) >>> df a b c fox 1 8 2 @@ -4767,7 +4844,7 @@ def cummin( -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([1, 6, 2, 3, 0, 4], index=lst) >>> ser a 1 @@ -4789,8 +4866,9 @@ def cummin( For DataFrameGroupBy: >>> data = [[1, 0, 2], [1, 1, 5], [6, 6, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["snake", "rabbit", "turtle"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["snake", "rabbit", "turtle"] + ... ) >>> df a b c snake 1 0 2 @@ -4828,7 +4906,7 @@ def cummax( -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([1, 6, 2, 3, 1, 4], index=lst) >>> ser a 1 @@ -4850,8 +4928,9 @@ def cummax( For DataFrameGroupBy: >>> data = [[1, 8, 2], [1, 1, 0], [2, 6, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["cow", "horse", "bull"]) + >>> df = pd.DataFrame( + ... data, columns=["a", "b", "c"], index=["cow", "horse", "bull"] + ... ) >>> df a b c cow 1 8 2 @@ -4915,7 +4994,7 @@ def shift( For SeriesGroupBy: - >>> lst = ['a', 'a', 'b', 'b'] + >>> lst = ["a", "a", "b", "b"] >>> ser = pd.Series([1, 2, 3, 4], index=lst) >>> ser a 1 @@ -4933,8 +5012,11 @@ def shift( For DataFrameGroupBy: >>> data = [[1, 2, 3], [1, 5, 6], [2, 5, 8], [2, 6, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["tuna", "salmon", "catfish", "goldfish"]) + >>> df = pd.DataFrame( + ... data, + ... columns=["a", "b", "c"], + ... index=["tuna", "salmon", "catfish", "goldfish"], + ... ) >>> df a b c tuna 1 2 3 @@ -5039,7 +5121,7 @@ def diff( -------- For SeriesGroupBy: - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([7, 2, 8, 4, 3, 3], index=lst) >>> ser a 7 @@ -5060,9 +5142,10 @@ def diff( For DataFrameGroupBy: - >>> data = {'a': [1, 3, 5, 7, 7, 8, 3], 'b': [1, 4, 8, 4, 4, 2, 1]} - >>> df = pd.DataFrame(data, index=['dog', 'dog', 'dog', - ... 'mouse', 'mouse', 'mouse', 'mouse']) + >>> data = {"a": [1, 3, 5, 7, 7, 8, 3], "b": [1, 4, 8, 4, 4, 2, 1]} + >>> df = pd.DataFrame( + ... data, index=["dog", "dog", "dog", "mouse", "mouse", "mouse", "mouse"] + ... ) >>> df a b dog 1 1 @@ -5121,7 +5204,7 @@ def pct_change( For SeriesGroupBy: - >>> lst = ['a', 'a', 'b', 'b'] + >>> lst = ["a", "a", "b", "b"] >>> ser = pd.Series([1, 2, 3, 4], index=lst) >>> ser a 1 @@ -5139,8 +5222,11 @@ def pct_change( For DataFrameGroupBy: >>> data = [[1, 2, 3], [1, 5, 6], [2, 5, 8], [2, 6, 9]] - >>> df = pd.DataFrame(data, columns=["a", "b", "c"], - ... index=["tuna", "salmon", "catfish", "goldfish"]) + >>> df = pd.DataFrame( + ... data, + ... columns=["a", "b", "c"], + ... index=["tuna", "salmon", "catfish", "goldfish"], + ... ) >>> df a b c tuna 1 2 3 @@ -5227,13 +5313,12 @@ def head(self, n: int = 5) -> NDFrameT: Examples -------- - >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], - ... columns=['A', 'B']) - >>> df.groupby('A').head(1) + >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"]) + >>> df.groupby("A").head(1) A B 0 1 2 2 5 6 - >>> df.groupby('A').head(-1) + >>> df.groupby("A").head(-1) A B 0 1 2 """ @@ -5265,13 +5350,14 @@ def tail(self, n: int = 5) -> NDFrameT: Examples -------- - >>> df = pd.DataFrame([['a', 1], ['a', 2], ['b', 1], ['b', 2]], - ... columns=['A', 'B']) - >>> df.groupby('A').tail(1) + >>> df = pd.DataFrame( + ... [["a", 1], ["a", 2], ["b", 1], ["b", 2]], columns=["A", "B"] + ... ) + >>> df.groupby("A").tail(1) A B 1 a 2 3 b 2 - >>> df.groupby('A').tail(-1) + >>> df.groupby("A").tail(-1) A B 1 a 2 3 b 2 diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 827c44736c6c0..7a316b28d902a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -148,10 +148,10 @@ class Grouper: ... pd.Timestamp("2000-01-02"), ... pd.Timestamp("2000-01-02"), ... pd.Timestamp("2000-01-09"), - ... pd.Timestamp("2000-01-16") + ... pd.Timestamp("2000-01-16"), ... ], ... "ID": [0, 1, 2, 3], - ... "Price": [10, 20, 30, 40] + ... "Price": [10, 20, 30, 40], ... } ... ) >>> df @@ -169,8 +169,8 @@ class Grouper: If you want to adjust the start of the bins based on a fixed timestamp: - >>> start, end = '2000-10-01 23:30:00', '2000-10-02 00:30:00' - >>> rng = pd.date_range(start, end, freq='7min') + >>> start, end = "2000-10-01 23:30:00", "2000-10-02 00:30:00" + >>> rng = pd.date_range(start, end, freq="7min") >>> ts = pd.Series(np.arange(len(rng)) * 3, index=rng) >>> ts 2000-10-01 23:30:00 0 @@ -184,7 +184,7 @@ class Grouper: 2000-10-02 00:26:00 24 Freq: 7min, dtype: int64 - >>> ts.groupby(pd.Grouper(freq='17min')).sum() + >>> ts.groupby(pd.Grouper(freq="17min")).sum() 2000-10-01 23:14:00 0 2000-10-01 23:31:00 9 2000-10-01 23:48:00 21 @@ -192,7 +192,7 @@ class Grouper: 2000-10-02 00:22:00 24 Freq: 17min, dtype: int64 - >>> ts.groupby(pd.Grouper(freq='17min', origin='epoch')).sum() + >>> ts.groupby(pd.Grouper(freq="17min", origin="epoch")).sum() 2000-10-01 23:18:00 0 2000-10-01 23:35:00 18 2000-10-01 23:52:00 27 @@ -200,7 +200,7 @@ class Grouper: 2000-10-02 00:26:00 24 Freq: 17min, dtype: int64 - >>> ts.groupby(pd.Grouper(freq='17min', origin='2000-01-01')).sum() + >>> ts.groupby(pd.Grouper(freq="17min", origin="2000-01-01")).sum() 2000-10-01 23:24:00 3 2000-10-01 23:41:00 15 2000-10-01 23:58:00 45 @@ -210,14 +210,14 @@ class Grouper: If you want to adjust the start of the bins with an `offset` Timedelta, the two following lines are equivalent: - >>> ts.groupby(pd.Grouper(freq='17min', origin='start')).sum() + >>> ts.groupby(pd.Grouper(freq="17min", origin="start")).sum() 2000-10-01 23:30:00 9 2000-10-01 23:47:00 21 2000-10-02 00:04:00 54 2000-10-02 00:21:00 24 Freq: 17min, dtype: int64 - >>> ts.groupby(pd.Grouper(freq='17min', offset='23h30min')).sum() + >>> ts.groupby(pd.Grouper(freq="17min", offset="23h30min")).sum() 2000-10-01 23:30:00 9 2000-10-01 23:47:00 21 2000-10-02 00:04:00 54 @@ -227,7 +227,7 @@ class Grouper: To replace the use of the deprecated `base` argument, you can now use `offset`, in this example it is equivalent to have `base=2`: - >>> ts.groupby(pd.Grouper(freq='17min', offset='2min')).sum() + >>> ts.groupby(pd.Grouper(freq="17min", offset="2min")).sum() 2000-10-01 23:16:00 0 2000-10-01 23:33:00 9 2000-10-01 23:50:00 36 diff --git a/pandas/core/groupby/indexing.py b/pandas/core/groupby/indexing.py index a3c5ab8edc94e..75c0a062b57d0 100644 --- a/pandas/core/groupby/indexing.py +++ b/pandas/core/groupby/indexing.py @@ -99,8 +99,9 @@ def _positional_selector(self) -> GroupByPositionalSelector: Examples -------- - >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], - ... columns=["A", "B"]) + >>> df = pd.DataFrame( + ... [["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], columns=["A", "B"] + ... ) >>> df.groupby("A")._positional_selector[1:2] A B 1 a 2 diff --git a/pandas/core/indexers/objects.py b/pandas/core/indexers/objects.py index 5119089bac977..3dd256e9ce45d 100644 --- a/pandas/core/indexers/objects.py +++ b/pandas/core/indexers/objects.py @@ -300,7 +300,7 @@ class FixedForwardWindowIndexer(BaseIndexer): Examples -------- - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index 55bb58f3108c3..78dbe3a1ca632 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -202,7 +202,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None: Examples -------- - >>> validate_indices(np.array([1, 2]), 3) # OK + >>> validate_indices(np.array([1, 2]), 3) # OK >>> validate_indices(np.array([1, -2]), 3) Traceback (most recent call last): @@ -214,7 +214,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None: ... IndexError: indices are out-of-bounds - >>> validate_indices(np.array([-1, -1]), 0) # OK + >>> validate_indices(np.array([-1, -1]), 0) # OK >>> validate_indices(np.array([0, 1]), 0) Traceback (most recent call last): @@ -502,7 +502,7 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: For non-integer/boolean dtypes, an appropriate error is raised: - >>> indexer = np.array([0., 2.], dtype="float64") + >>> indexer = np.array([0.0, 2.0], dtype="float64") >>> pd.api.indexers.check_array_indexer(arr, indexer) Traceback (most recent call last): ... diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index a91fb0a8d718d..8a742a0a9d57d 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -346,7 +346,7 @@ def to_pydatetime(self) -> np.ndarray: Examples -------- - >>> s = pd.Series(pd.date_range('20180310', periods=2)) + >>> s = pd.Series(pd.date_range("20180310", periods=2)) >>> s 0 2018-03-10 1 2018-03-11 @@ -358,7 +358,7 @@ def to_pydatetime(self) -> np.ndarray: pandas' nanosecond precision is truncated to microseconds. - >>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns')) + >>> s = pd.Series(pd.date_range("20180310", periods=2, freq="ns")) >>> s 0 2018-03-10 00:00:00.000000000 1 2018-03-10 00:00:00.000000001 @@ -494,7 +494,7 @@ def components(self) -> DataFrame: Examples -------- - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s')) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="s")) >>> s 0 0 days 00:00:00 1 0 days 00:00:01 diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e87ecb1b6011c..124d56d737251 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -362,7 +362,7 @@ class Index(IndexOpsMixin, PandasObject): >>> pd.Index([1, 2, 3]) Index([1, 2, 3], dtype='int64') - >>> pd.Index(list('abc')) + >>> pd.Index(list("abc")) Index(['a', 'b', 'c'], dtype='object') >>> pd.Index([1, 2, 3], dtype="uint8") @@ -725,7 +725,7 @@ def _format_duplicate_message(self) -> DataFrame: Examples -------- - >>> idx = pd.Index(['a', 'b', 'a']) + >>> idx = pd.Index(["a", "b", "a"]) >>> idx._format_duplicate_message() positions label @@ -812,7 +812,7 @@ def is_(self, other) -> bool: Examples -------- - >>> idx1 = pd.Index(['1', '2', '3']) + >>> idx1 = pd.Index(["1", "2", "3"]) >>> idx1.is_(idx1.view()) True @@ -1006,7 +1006,7 @@ def ravel(self, order: str_t = "C") -> Self: Examples -------- - >>> s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + >>> s = pd.Series([1, 2, 3], index=["a", "b", "c"]) >>> s.index.ravel() Index(['a', 'b', 'c'], dtype='object') """ @@ -1076,7 +1076,7 @@ def astype(self, dtype, copy: bool = True): >>> idx = pd.Index([1, 2, 3]) >>> idx Index([1, 2, 3], dtype='int64') - >>> idx.astype('float') + >>> idx.astype("float") Index([1.0, 2.0, 3.0], dtype='float64') """ if dtype is not None: @@ -1279,7 +1279,7 @@ def copy( Examples -------- - >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx = pd.Index(["a", "b", "c"]) >>> new_idx = idx.copy() >>> idx is new_idx False @@ -1571,7 +1571,7 @@ def to_series(self, index=None, name: Hashable | None = None) -> Series: Examples -------- - >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + >>> idx = pd.Index(["Ant", "Bear", "Cow"], name="animal") By default, the original index and original name is reused. @@ -1592,7 +1592,7 @@ def to_series(self, index=None, name: Hashable | None = None) -> Series: To override the name of the resulting column, specify ``name``: - >>> idx.to_series(name='zoo') + >>> idx.to_series(name="zoo") animal Ant Ant Bear Bear @@ -1635,7 +1635,7 @@ def to_frame( Examples -------- - >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') + >>> idx = pd.Index(["Ant", "Bear", "Cow"], name="animal") >>> idx.to_frame() animal animal @@ -1653,7 +1653,7 @@ def to_frame( To override the name of the resulting column, specify `name`: - >>> idx.to_frame(index=False, name='zoo') + >>> idx.to_frame(index=False, name="zoo") zoo 0 Ant 1 Bear @@ -1679,7 +1679,7 @@ def name(self) -> Hashable: Examples -------- - >>> idx = pd.Index([1, 2, 3], name='x') + >>> idx = pd.Index([1, 2, 3], name="x") >>> idx Index([1, 2, 3], dtype='int64', name='x') >>> idx.name @@ -1848,19 +1848,18 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: >>> idx = pd.Index([1, 2, 3, 4]) >>> idx Index([1, 2, 3, 4], dtype='int64') - >>> idx.set_names('quarter') + >>> idx.set_names("quarter") Index([1, 2, 3, 4], dtype='int64', name='quarter') - >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], - ... [2018, 2019]]) + >>> idx = pd.MultiIndex.from_product([["python", "cobra"], [2018, 2019]]) >>> idx MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], ) - >>> idx = idx.set_names(['kind', 'year']) - >>> idx.set_names('species', level=0) + >>> idx = idx.set_names(["kind", "year"]) + >>> idx.set_names("species", level=0) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), @@ -1869,7 +1868,7 @@ def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: When renaming levels with a dict, levels can not be passed. - >>> idx.set_names({'kind': 'snake'}) + >>> idx.set_names({"kind": "snake"}) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), @@ -1952,26 +1951,26 @@ def rename(self, name, inplace: bool = False) -> Self | None: Examples -------- - >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') - >>> idx.rename('grade') + >>> idx = pd.Index(["A", "C", "A", "B"], name="score") + >>> idx.rename("grade") Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') - >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], - ... [2018, 2019]], - ... names=('kind', 'year')) + >>> idx = pd.MultiIndex.from_product( + ... [["python", "cobra"], [2018, 2019]], names=["kind", "year"] + ... ) >>> idx MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], names=('kind', 'year')) - >>> idx.rename(['species', 'year']) + >>> idx.rename(["species", "year"]) MultiIndex([('python', 2018), ('python', 2019), ( 'cobra', 2018), ( 'cobra', 2019)], names=('species', 'year')) - >>> idx.rename('species') + >>> idx.rename("species") Traceback (most recent call last): TypeError: Must pass list-like as `names`. """ @@ -2094,7 +2093,7 @@ def _get_level_values(self, level) -> Index: Examples -------- - >>> idx = pd.Index(list('abc')) + >>> idx = pd.Index(list("abc")) >>> idx Index(['a', 'b', 'c'], dtype='object') @@ -2129,7 +2128,7 @@ def droplevel(self, level: IndexLabel = 0): Examples -------- >>> mi = pd.MultiIndex.from_arrays( - ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'] + ... [[1, 2], [3, 4], [5, 6]], names=["x", "y", "z"] ... ) >>> mi MultiIndex([(1, 3, 5), @@ -2146,12 +2145,12 @@ def droplevel(self, level: IndexLabel = 0): (2, 4)], names=('x', 'y')) - >>> mi.droplevel('z') + >>> mi.droplevel("z") MultiIndex([(1, 3), (2, 4)], names=('x', 'y')) - >>> mi.droplevel(['x', 'y']) + >>> mi.droplevel(["x", "y"]) Index([5, 6], dtype='int64', name='z') """ if not isinstance(level, (tuple, list)): @@ -2338,13 +2337,13 @@ def is_unique(self) -> bool: >>> idx.is_unique True - >>> idx = pd.Index(["Watermelon", "Orange", "Apple", - ... "Watermelon"]).astype("category") + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", "Watermelon"]).astype( + ... "category" + ... ) >>> idx.is_unique False - >>> idx = pd.Index(["Orange", "Apple", - ... "Watermelon"]).astype("category") + >>> idx = pd.Index(["Orange", "Apple", "Watermelon"]).astype("category") >>> idx.is_unique True """ @@ -2375,13 +2374,13 @@ def has_duplicates(self) -> bool: >>> idx.has_duplicates False - >>> idx = pd.Index(["Watermelon", "Orange", "Apple", - ... "Watermelon"]).astype("category") + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", "Watermelon"]).astype( + ... "category" + ... ) >>> idx.has_duplicates True - >>> idx = pd.Index(["Orange", "Apple", - ... "Watermelon"]).astype("category") + >>> idx = pd.Index(["Orange", "Apple", "Watermelon"]).astype("category") >>> idx.has_duplicates False """ @@ -2611,8 +2610,9 @@ def is_object(self) -> bool: >>> idx.is_object() # doctest: +SKIP True - >>> idx = pd.Index(["Watermelon", "Orange", "Apple", - ... "Watermelon"]).astype("category") + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", "Watermelon"]).astype( + ... "category" + ... ) >>> idx.is_object() # doctest: +SKIP False @@ -2653,8 +2653,9 @@ def is_categorical(self) -> bool: Examples -------- - >>> idx = pd.Index(["Watermelon", "Orange", "Apple", - ... "Watermelon"]).astype("category") + >>> idx = pd.Index(["Watermelon", "Orange", "Apple", "Watermelon"]).astype( + ... "category" + ... ) >>> idx.is_categorical() # doctest: +SKIP True @@ -2706,8 +2707,9 @@ def is_interval(self) -> bool: Examples -------- - >>> idx = pd.Index([pd.Interval(left=0, right=5), - ... pd.Interval(left=5, right=10)]) + >>> idx = pd.Index( + ... [pd.Interval(left=0, right=5), pd.Interval(left=5, right=10)] + ... ) >>> idx.is_interval() # doctest: +SKIP True @@ -2832,7 +2834,7 @@ def hasnans(self) -> bool: Examples -------- - >>> s = pd.Series([1, 2, 3], index=['a', 'b', None]) + >>> s = pd.Series([1, 2, 3], index=["a", "b", None]) >>> s a 1 b 2 @@ -2883,7 +2885,7 @@ def isna(self) -> npt.NDArray[np.bool_]: Empty strings are not considered NA values. None is considered an NA value. - >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx = pd.Index(["black", "", "red", None]) >>> idx Index(['black', '', 'red', None], dtype='object') >>> idx.isna() @@ -2891,8 +2893,9 @@ def isna(self) -> npt.NDArray[np.bool_]: For datetimes, `NaT` (Not a Time) is considered as an NA value. - >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), - ... pd.Timestamp(''), None, pd.NaT]) + >>> idx = pd.DatetimeIndex( + ... [pd.Timestamp("1940-04-25"), pd.Timestamp(""), None, pd.NaT] + ... ) >>> idx DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], dtype='datetime64[ns]', freq=None) @@ -2939,7 +2942,7 @@ def notna(self) -> npt.NDArray[np.bool_]: Empty strings are not considered NA values. None is considered a NA value. - >>> idx = pd.Index(['black', '', 'red', None]) + >>> idx = pd.Index(["black", "", "red", None]) >>> idx Index(['black', '', 'red', None], dtype='object') >>> idx.notna() @@ -3099,20 +3102,20 @@ def drop_duplicates(self, *, keep: DropKeep = "first") -> Self: -------- Generate an pandas.Index with duplicate values. - >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) + >>> idx = pd.Index(["llama", "cow", "llama", "beetle", "llama", "hippo"]) The `keep` parameter controls which duplicate values are removed. The value 'first' keeps the first occurrence for each set of duplicated entries. The default value of keep is 'first'. - >>> idx.drop_duplicates(keep='first') - Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') + >>> idx.drop_duplicates(keep="first") + Index(['llama', 'cow', 'beetle', 'hippo'], dtype='object') The value 'last' keeps the last occurrence for each set of duplicated entries. - >>> idx.drop_duplicates(keep='last') - Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') + >>> idx.drop_duplicates(keep="last") + Index(['cow', 'beetle', 'llama', 'hippo'], dtype='object') The value ``False`` discards all sets of duplicated entries. @@ -3158,19 +3161,19 @@ def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: By default, for each set of duplicated values, the first occurrence is set to False and all others to True: - >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) + >>> idx = pd.Index(["llama", "cow", "llama", "beetle", "llama"]) >>> idx.duplicated() array([False, False, True, False, True]) which is equivalent to - >>> idx.duplicated(keep='first') + >>> idx.duplicated(keep="first") array([False, False, True, False, True]) By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: - >>> idx.duplicated(keep='last') + >>> idx.duplicated(keep="last") array([ True, False, True, False, False]) By setting keep on ``False``, all duplicates are True: @@ -3279,7 +3282,7 @@ def union(self, other, sort=None): Union mismatched dtypes - >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) + >>> idx1 = pd.Index(["a", "b", "c", "d"]) >>> idx2 = pd.Index([1, 2, 3, 4]) >>> idx1.union(idx2) Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') @@ -3783,16 +3786,16 @@ def get_loc(self, key): Examples -------- - >>> unique_index = pd.Index(list('abc')) - >>> unique_index.get_loc('b') + >>> unique_index = pd.Index(list("abc")) + >>> unique_index.get_loc("b") 1 - >>> monotonic_index = pd.Index(list('abbc')) - >>> monotonic_index.get_loc('b') + >>> monotonic_index = pd.Index(list("abbc")) + >>> monotonic_index.get_loc("b") slice(1, 3, None) - >>> non_monotonic_index = pd.Index(list('abcb')) - >>> non_monotonic_index.get_loc('b') + >>> non_monotonic_index = pd.Index(list("abcb")) + >>> non_monotonic_index.get_loc("b") array([False, True, False, True]) """ casted_key = self._maybe_cast_indexer(key) @@ -3863,8 +3866,8 @@ def get_indexer( Examples -------- - >>> index = pd.Index(['c', 'a', 'b']) - >>> index.get_indexer(['a', 'b', 'x']) + >>> index = pd.Index(["c", "a", "b"]) + >>> index.get_indexer(["a", "b", "x"]) array([ 1, 2, -1]) Notice that the return value is an array of locations in ``index`` @@ -4374,10 +4377,10 @@ def reindex( Examples -------- - >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx = pd.Index(["car", "bike", "train", "tractor"]) >>> idx Index(['car', 'bike', 'train', 'tractor'], dtype='object') - >>> idx.reindex(['car', 'bike']) + >>> idx.reindex(["car", "bike"]) (Index(['car', 'bike'], dtype='object'), array([0, 1])) """ # GH6552: preserve names when reindexing to non-named target @@ -4581,7 +4584,7 @@ def join( -------- >>> idx1 = pd.Index([1, 2, 3]) >>> idx2 = pd.Index([4, 5, 6]) - >>> idx1.join(idx2, how='outer') + >>> idx1.join(idx2, how="outer") Index([1, 2, 3, 4, 5, 6], dtype='int64') """ other = ensure_index(other) @@ -4865,7 +4868,7 @@ def _join_level( from pandas.core.indexes.multi import MultiIndex def _get_leaf_sorter( - labels: tuple[np.ndarray, ...] | list[np.ndarray] + labels: tuple[np.ndarray, ...] | list[np.ndarray], ) -> npt.NDArray[np.intp]: """ Returns sorter for the inner most level while preserving the @@ -5303,10 +5306,10 @@ def where(self, cond, other=None) -> Index: Examples -------- - >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) + >>> idx = pd.Index(["car", "bike", "train", "tractor"]) >>> idx Index(['car', 'bike', 'train', 'tractor'], dtype='object') - >>> idx.where(idx.isin(['car', 'train']), 'other') + >>> idx.where(idx.isin(["car", "train"]), "other") Index(['car', 'other', 'train', 'other'], dtype='object') """ if isinstance(self, ABCMultiIndex): @@ -5635,10 +5638,10 @@ def equals(self, other: Any) -> bool: The dtype is *not* compared - >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') + >>> int64_idx = pd.Index([1, 2, 3], dtype="int64") >>> int64_idx Index([1, 2, 3], dtype='int64') - >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') + >>> uint64_idx = pd.Index([1, 2, 3], dtype="uint64") >>> uint64_idx Index([1, 2, 3], dtype='uint64') >>> int64_idx.equals(uint64_idx) @@ -5697,13 +5700,13 @@ def identical(self, other) -> bool: Examples -------- - >>> idx1 = pd.Index(['1', '2', '3']) - >>> idx2 = pd.Index(['1', '2', '3']) + >>> idx1 = pd.Index(["1", "2", "3"]) + >>> idx2 = pd.Index(["1", "2", "3"]) >>> idx2.identical(idx1) True - >>> idx1 = pd.Index(['1', '2', '3'], name="A") - >>> idx2 = pd.Index(['1', '2', '3'], name="B") + >>> idx1 = pd.Index(["1", "2", "3"], name="A") + >>> idx2 = pd.Index(["1", "2", "3"], name="B") >>> idx2.identical(idx1) False """ @@ -5751,26 +5754,25 @@ def asof(self, label): -------- `Index.asof` returns the latest index label up to the passed label. - >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) - >>> idx.asof('2014-01-01') + >>> idx = pd.Index(["2013-12-31", "2014-01-02", "2014-01-03"]) + >>> idx.asof("2014-01-01") '2013-12-31' If the label is in the index, the method returns the passed label. - >>> idx.asof('2014-01-02') + >>> idx.asof("2014-01-02") '2014-01-02' If all of the labels in the index are later than the passed label, NaN is returned. - >>> idx.asof('1999-01-02') + >>> idx.asof("1999-01-02") nan If the index is not sorted, an error is raised. - >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', - ... '2014-01-03']) - >>> idx_not_sorted.asof('2013-12-31') + >>> idx_not_sorted = pd.Index(["2013-12-31", "2015-01-02", "2014-01-03"]) + >>> idx_not_sorted.asof("2013-12-31") Traceback (most recent call last): ValueError: index must be monotonic increasing or decreasing """ @@ -5830,9 +5832,10 @@ def asof_locs( Examples -------- - >>> idx = pd.date_range('2023-06-01', periods=3, freq='D') - >>> where = pd.DatetimeIndex(['2023-05-30 00:12:00', '2023-06-01 00:00:00', - ... '2023-06-02 23:59:59']) + >>> idx = pd.date_range("2023-06-01", periods=3, freq="D") + >>> where = pd.DatetimeIndex( + ... ["2023-05-30 00:12:00", "2023-06-01 00:00:00", "2023-06-02 23:59:59"] + ... ) >>> mask = np.ones(3, dtype=bool) >>> idx.asof_locs(where, mask) array([-1, 0, 1]) @@ -6024,7 +6027,7 @@ def shift(self, periods: int = 1, freq=None) -> Self: -------- Put the first 5 month starts of 2011 into an index. - >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') + >>> month_starts = pd.date_range("1/1/2011", periods=5, freq="MS") >>> month_starts DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', '2011-05-01'], @@ -6032,7 +6035,7 @@ def shift(self, periods: int = 1, freq=None) -> Self: Shift the index by 10 days. - >>> month_starts.shift(10, freq='D') + >>> month_starts.shift(10, freq="D") DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', '2011-05-11'], dtype='datetime64[ns]', freq=None) @@ -6074,7 +6077,7 @@ def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: Examples -------- - >>> idx = pd.Index(['b', 'a', 'd', 'c']) + >>> idx = pd.Index(["b", "a", "d", "c"]) >>> idx Index(['b', 'a', 'd', 'c'], dtype='object') @@ -6209,7 +6212,7 @@ def get_indexer_for(self, target) -> npt.NDArray[np.intp]: Examples -------- - >>> idx = pd.Index([np.nan, 'var1', np.nan]) + >>> idx = pd.Index([np.nan, "var1", np.nan]) >>> idx.get_indexer_for([np.nan]) array([0, 2]) """ @@ -6508,16 +6511,16 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): Examples -------- >>> idx = pd.Index([1, 2, 3]) - >>> idx.map({1: 'a', 2: 'b', 3: 'c'}) + >>> idx.map({1: "a", 2: "b", 3: "c"}) Index(['a', 'b', 'c'], dtype='object') Using `map` with a function: >>> idx = pd.Index([1, 2, 3]) - >>> idx.map('I am a {}'.format) + >>> idx.map("I am a {}".format) Index(['I am a 1', 'I am a 2', 'I am a 3'], dtype='object') - >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx = pd.Index(["a", "b", "c"]) >>> idx.map(lambda x: x.upper()) Index(['A', 'B', 'C'], dtype='object') """ @@ -6621,9 +6624,9 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: >>> idx.isin([1, 4]) array([ True, False, False]) - >>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], - ... ['red', 'blue', 'green']], - ... names=('number', 'color')) + >>> midx = pd.MultiIndex.from_arrays( + ... [[1, 2, 3], ["red", "blue", "green"]], names=("number", "color") + ... ) >>> midx MultiIndex([(1, 'red'), (2, 'blue'), @@ -6633,12 +6636,12 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: Check whether the strings in the 'color' level of the MultiIndex are in a list of colors. - >>> midx.isin(['red', 'orange', 'yellow'], level='color') + >>> midx.isin(["red", "orange", "yellow"], level="color") array([ True, False, False]) To check across the levels of a MultiIndex, pass a list of tuples: - >>> midx.isin([(1, 'red'), (3, 'red')]) + >>> midx.isin([(1, "red"), (3, "red")]) array([ True, False, False]) """ if level is not None: @@ -6686,12 +6689,12 @@ def slice_indexer( -------- This is a method on all index types. For example you can do: - >>> idx = pd.Index(list('abcd')) - >>> idx.slice_indexer(start='b', end='c') + >>> idx = pd.Index(list("abcd")) + >>> idx.slice_indexer(start="b", end="c") slice(1, 3, None) - >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) - >>> idx.slice_indexer(start='b', end=('c', 'g')) + >>> idx = pd.MultiIndex.from_arrays([list("abcd"), list("efgh")]) + >>> idx.slice_indexer(start="b", end=("c", "g")) slice(1, 3, None) """ start_slice, end_slice = self.slice_locs(start, end, step=step) @@ -6802,16 +6805,16 @@ def get_slice_bound(self, label, side: Literal["left", "right"]) -> int: Examples -------- >>> idx = pd.RangeIndex(5) - >>> idx.get_slice_bound(3, 'left') + >>> idx.get_slice_bound(3, "left") 3 - >>> idx.get_slice_bound(3, 'right') + >>> idx.get_slice_bound(3, "right") 4 If ``label`` is non-unique in the index, an error will be raised. - >>> idx_duplicate = pd.Index(['a', 'b', 'a', 'c', 'd']) - >>> idx_duplicate.get_slice_bound('a', 'left') + >>> idx_duplicate = pd.Index(["a", "b", "a", "c", "d"]) + >>> idx_duplicate.get_slice_bound("a", "left") Traceback (most recent call last): KeyError: Cannot get left slice bound for non-unique label: 'a' """ @@ -6887,8 +6890,8 @@ def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: Examples -------- - >>> idx = pd.Index(list('abcd')) - >>> idx.slice_locs(start='b', end='c') + >>> idx = pd.Index(list("abcd")) + >>> idx.slice_locs(start="b", end="c") (1, 3) """ inc = step is None or step >= 0 @@ -6969,11 +6972,11 @@ def delete(self, loc) -> Self: Examples -------- - >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx = pd.Index(["a", "b", "c"]) >>> idx.delete(1) Index(['a', 'c'], dtype='object') - >>> idx = pd.Index(['a', 'b', 'c']) + >>> idx = pd.Index(["a", "b", "c"]) >>> idx.delete([0, 2]) Index(['b'], dtype='object') """ @@ -7005,8 +7008,8 @@ def insert(self, loc: int, item) -> Index: Examples -------- - >>> idx = pd.Index(['a', 'b', 'c']) - >>> idx.insert(1, 'x') + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.insert(1, "x") Index(['a', 'x', 'b', 'c'], dtype='object') """ item = lib.item_from_zerodim(item) @@ -7089,8 +7092,8 @@ def drop( Examples -------- - >>> idx = pd.Index(['a', 'b', 'c']) - >>> idx.drop(['a']) + >>> idx = pd.Index(["a", "b", "c"]) + >>> idx.drop(["a"]) Index(['b', 'c'], dtype='object') """ if not isinstance(labels, Index): @@ -7468,13 +7471,13 @@ def min(self, axis=None, skipna: bool = True, *args, **kwargs): >>> idx.min() 1 - >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx = pd.Index(["c", "b", "a"]) >>> idx.min() 'a' For a MultiIndex, the minimum is determined lexicographically. - >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx = pd.MultiIndex.from_product([("a", "b"), (2, 1)]) >>> idx.min() ('a', 1) """ @@ -7531,13 +7534,13 @@ def max(self, axis=None, skipna: bool = True, *args, **kwargs): >>> idx.max() 3 - >>> idx = pd.Index(['c', 'b', 'a']) + >>> idx = pd.Index(["c", "b", "a"]) >>> idx.max() 'c' For a MultiIndex, the maximum is determined lexicographically. - >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) + >>> idx = pd.MultiIndex.from_product([("a", "b"), (2, 1)]) >>> idx.max() ('b', 2) """ @@ -7645,13 +7648,13 @@ def ensure_index(index_like: Axes, copy: bool = False) -> Index: Examples -------- - >>> ensure_index(['a', 'b']) + >>> ensure_index(["a", "b"]) Index(['a', 'b'], dtype='object') - >>> ensure_index([('a', 'a'), ('b', 'c')]) + >>> ensure_index([("a", "a"), ("b", "c")]) Index([('a', 'a'), ('b', 'c')], dtype='object') - >>> ensure_index([['a', 'a'], ['b', 'c']]) + >>> ensure_index([["a", "a"], ["b", "c"]]) MultiIndex([('a', 'b'), ('a', 'c')], ) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b307be004ad6e..5e9d15812526f 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -284,14 +284,14 @@ def equals(self, other: object) -> bool: Examples -------- - >>> ci = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c']) - >>> ci2 = pd.CategoricalIndex(pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])) + >>> ci = pd.CategoricalIndex(["a", "b", "c", "a", "b", "c"]) + >>> ci2 = pd.CategoricalIndex(pd.Categorical(["a", "b", "c", "a", "b", "c"])) >>> ci.equals(ci2) True The order of elements matters. - >>> ci3 = pd.CategoricalIndex(['c', 'b', 'a', 'a', 'b', 'c']) + >>> ci3 = pd.CategoricalIndex(["c", "b", "a", "a", "b", "c"]) >>> ci.equals(ci3) False @@ -304,16 +304,17 @@ def equals(self, other: object) -> bool: The categories matter, but the order of the categories matters only when ``ordered=True``. - >>> ci5 = ci.set_categories(['a', 'b', 'c', 'd']) + >>> ci5 = ci.set_categories(["a", "b", "c", "d"]) >>> ci.equals(ci5) False - >>> ci6 = ci.set_categories(['b', 'c', 'a']) + >>> ci6 = ci.set_categories(["b", "c", "a"]) >>> ci.equals(ci6) True - >>> ci_ordered = pd.CategoricalIndex(['a', 'b', 'c', 'a', 'b', 'c'], - ... ordered=True) - >>> ci2_ordered = ci_ordered.set_categories(['b', 'c', 'a']) + >>> ci_ordered = pd.CategoricalIndex( + ... ["a", "b", "c", "a", "b", "c"], ordered=True + ... ) + >>> ci2_ordered = ci_ordered.set_categories(["b", "c", "a"]) >>> ci_ordered.equals(ci2_ordered) False """ @@ -462,37 +463,37 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): Examples -------- - >>> idx = pd.CategoricalIndex(['a', 'b', 'c']) + >>> idx = pd.CategoricalIndex(["a", "b", "c"]) >>> idx CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category') >>> idx.map(lambda x: x.upper()) CategoricalIndex(['A', 'B', 'C'], categories=['A', 'B', 'C'], ordered=False, dtype='category') - >>> idx.map({'a': 'first', 'b': 'second', 'c': 'third'}) + >>> idx.map({"a": "first", "b": "second", "c": "third"}) CategoricalIndex(['first', 'second', 'third'], categories=['first', 'second', 'third'], ordered=False, dtype='category') If the mapping is one-to-one the ordering of the categories is preserved: - >>> idx = pd.CategoricalIndex(['a', 'b', 'c'], ordered=True) + >>> idx = pd.CategoricalIndex(["a", "b", "c"], ordered=True) >>> idx CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=True, dtype='category') - >>> idx.map({'a': 3, 'b': 2, 'c': 1}) + >>> idx.map({"a": 3, "b": 2, "c": 1}) CategoricalIndex([3, 2, 1], categories=[3, 2, 1], ordered=True, dtype='category') If the mapping is not one-to-one an :class:`~pandas.Index` is returned: - >>> idx.map({'a': 'first', 'b': 'second', 'c': 'first'}) + >>> idx.map({"a": "first", "b": "second", "c": "first"}) Index(['first', 'second', 'first'], dtype='object') If a `dict` is used, all unmapped categories are mapped to `NaN` and the result is an :class:`~pandas.Index`: - >>> idx.map({'a': 'first', 'b': 'second'}) + >>> idx.map({"a": "first", "b": "second"}) Index(['first', 'second', nan], dtype='object') """ mapped = self._values.map(mapper, na_action=na_action) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a5670536c74f7..45decaf97a188 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -463,20 +463,20 @@ def as_unit(self, unit: str) -> Self: -------- For :class:`pandas.DatetimeIndex`: - >>> idx = pd.DatetimeIndex(['2020-01-02 01:02:03.004005006']) + >>> idx = pd.DatetimeIndex(["2020-01-02 01:02:03.004005006"]) >>> idx DatetimeIndex(['2020-01-02 01:02:03.004005006'], dtype='datetime64[ns]', freq=None) - >>> idx.as_unit('s') + >>> idx.as_unit("s") DatetimeIndex(['2020-01-02 01:02:03'], dtype='datetime64[s]', freq=None) For :class:`pandas.TimedeltaIndex`: - >>> tdelta_idx = pd.to_timedelta(['1 day 3 min 2 us 42 ns']) + >>> tdelta_idx = pd.to_timedelta(["1 day 3 min 2 us 42 ns"]) >>> tdelta_idx TimedeltaIndex(['1 days 00:03:00.000002042'], dtype='timedelta64[ns]', freq=None) - >>> tdelta_idx.as_unit('s') + >>> tdelta_idx.as_unit("s") TimedeltaIndex(['1 days 00:03:00'], dtype='timedelta64[s]', freq=None) """ arr = self._data.as_unit(unit) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 3cf3352e64f27..282a11122211b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -488,12 +488,13 @@ def snap(self, freq: Frequency = "S") -> DatetimeIndex: Examples -------- - >>> idx = pd.DatetimeIndex(['2023-01-01', '2023-01-02', - ... '2023-02-01', '2023-02-02']) + >>> idx = pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-02", "2023-02-01", "2023-02-02"] + ... ) >>> idx DatetimeIndex(['2023-01-01', '2023-01-02', '2023-02-01', '2023-02-02'], dtype='datetime64[ns]', freq=None) - >>> idx.snap('MS') + >>> idx.snap("MS") DatetimeIndex(['2023-01-01', '2023-01-01', '2023-02-01', '2023-02-01'], dtype='datetime64[ns]', freq=None) """ @@ -737,8 +738,9 @@ def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]: Examples -------- - >>> idx = pd.DatetimeIndex(["1/1/2020 10:00", "2/1/2020 11:00", - ... "3/1/2020 10:00"]) + >>> idx = pd.DatetimeIndex( + ... ["1/1/2020 10:00", "2/1/2020 11:00", "3/1/2020 10:00"] + ... ) >>> idx.indexer_at_time("10:00") array([0, 2]) """ @@ -906,7 +908,7 @@ def date_range( Specify `start` and `end`, with the default daily frequency. - >>> pd.date_range(start='1/1/2018', end='1/08/2018') + >>> pd.date_range(start="1/1/2018", end="1/08/2018") DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], dtype='datetime64[ns]', freq='D') @@ -925,14 +927,14 @@ def date_range( Specify `start` and `periods`, the number of periods (days). - >>> pd.date_range(start='1/1/2018', periods=8) + >>> pd.date_range(start="1/1/2018", periods=8) DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'], dtype='datetime64[ns]', freq='D') Specify `end` and `periods`, the number of periods (days). - >>> pd.date_range(end='1/1/2018', periods=8) + >>> pd.date_range(end="1/1/2018", periods=8) DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28', '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'], dtype='datetime64[ns]', freq='D') @@ -940,7 +942,7 @@ def date_range( Specify `start`, `end`, and `periods`; the frequency is generated automatically (linearly spaced). - >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3) + >>> pd.date_range(start="2018-04-24", end="2018-04-27", periods=3) DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00', '2018-04-27 00:00:00'], dtype='datetime64[ns]', freq=None) @@ -949,28 +951,28 @@ def date_range( Changed the `freq` (frequency) to ``'ME'`` (month end frequency). - >>> pd.date_range(start='1/1/2018', periods=5, freq='ME') + >>> pd.date_range(start="1/1/2018", periods=5, freq="ME") DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30', '2018-05-31'], dtype='datetime64[ns]', freq='ME') Multiples are allowed - >>> pd.date_range(start='1/1/2018', periods=5, freq='3ME') + >>> pd.date_range(start="1/1/2018", periods=5, freq="3ME") DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], dtype='datetime64[ns]', freq='3ME') `freq` can also be specified as an Offset object. - >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3)) + >>> pd.date_range(start="1/1/2018", periods=5, freq=pd.offsets.MonthEnd(3)) DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31', '2019-01-31'], dtype='datetime64[ns]', freq='3ME') Specify `tz` to set the timezone. - >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo') + >>> pd.date_range(start="1/1/2018", periods=5, tz="Asia/Tokyo") DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00', '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00', '2018-01-05 00:00:00+09:00'], @@ -979,20 +981,20 @@ def date_range( `inclusive` controls whether to include `start` and `end` that are on the boundary. The default, "both", includes boundary points on either end. - >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both") + >>> pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="both") DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') Use ``inclusive='left'`` to exclude `end` if it falls on the boundary. - >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left') + >>> pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="left") DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'], dtype='datetime64[ns]', freq='D') Use ``inclusive='right'`` to exclude `start` if it falls on the boundary, and similarly ``inclusive='neither'`` will exclude both `start` and `end`. - >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right') + >>> pd.date_range(start="2017-01-01", end="2017-01-04", inclusive="right") DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'], dtype='datetime64[ns]', freq='D') @@ -1088,7 +1090,7 @@ def bdate_range( -------- Note how the two weekend days are skipped in the result. - >>> pd.bdate_range(start='1/1/2018', end='1/08/2018') + >>> pd.bdate_range(start="1/1/2018", end="1/08/2018") DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04', '2018-01-05', '2018-01-08'], dtype='datetime64[ns]', freq='B') diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index f3f3e286e43e5..46d1ee49c22a0 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -479,7 +479,7 @@ def is_overlapping(self) -> bool: Intervals that share closed endpoints overlap: - >>> index = pd.interval_range(0, 3, closed='both') + >>> index = pd.interval_range(0, 3, closed="both") >>> index IntervalIndex([[0, 1], [1, 2], [2, 3]], dtype='interval[int64, both]') @@ -488,7 +488,7 @@ def is_overlapping(self) -> bool: Intervals that only have an open endpoint in common do not overlap: - >>> index = pd.interval_range(0, 3, closed='left') + >>> index = pd.interval_range(0, 3, closed="left") >>> index IntervalIndex([[0, 1), [1, 2), [2, 3)], dtype='interval[int64, left]') @@ -1017,8 +1017,9 @@ def interval_range( Additionally, datetime-like input is also supported. - >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), - ... end=pd.Timestamp('2017-01-04')) + >>> pd.interval_range( + ... start=pd.Timestamp("2017-01-01"), end=pd.Timestamp("2017-01-04") + ... ) IntervalIndex([(2017-01-01 00:00:00, 2017-01-02 00:00:00], (2017-01-02 00:00:00, 2017-01-03 00:00:00], (2017-01-03 00:00:00, 2017-01-04 00:00:00]], @@ -1035,8 +1036,7 @@ def interval_range( Similarly, for datetime-like ``start`` and ``end``, the frequency must be convertible to a DateOffset. - >>> pd.interval_range(start=pd.Timestamp('2017-01-01'), - ... periods=3, freq='MS') + >>> pd.interval_range(start=pd.Timestamp("2017-01-01"), periods=3, freq="MS") IntervalIndex([(2017-01-01 00:00:00, 2017-02-01 00:00:00], (2017-02-01 00:00:00, 2017-03-01 00:00:00], (2017-03-01 00:00:00, 2017-04-01 00:00:00]], @@ -1052,7 +1052,7 @@ def interval_range( The ``closed`` parameter specifies which endpoints of the individual intervals within the ``IntervalIndex`` are closed. - >>> pd.interval_range(end=5, periods=4, closed='both') + >>> pd.interval_range(end=5, periods=4, closed="both") IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]], dtype='interval[int64, both]') """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a11dad9dcb518..c81d76d471a5f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -296,8 +296,8 @@ class MultiIndex(Index): methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): - >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] - >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + >>> arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]] + >>> pd.MultiIndex.from_arrays(arrays, names=("number", "color")) MultiIndex([(1, 'red'), (1, 'blue'), (2, 'red'), @@ -502,8 +502,8 @@ def from_arrays( Examples -------- - >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] - >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) + >>> arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]] + >>> pd.MultiIndex.from_arrays(arrays, names=("number", "color")) MultiIndex([(1, 'red'), (1, 'blue'), (2, 'red'), @@ -573,9 +573,8 @@ def from_tuples( Examples -------- - >>> tuples = [(1, 'red'), (1, 'blue'), - ... (2, 'red'), (2, 'blue')] - >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) + >>> tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "blue")] + >>> pd.MultiIndex.from_tuples(tuples, names=("number", "color")) MultiIndex([(1, 'red'), (1, 'blue'), (2, 'red'), @@ -655,9 +654,8 @@ def from_product( Examples -------- >>> numbers = [0, 1, 2] - >>> colors = ['green', 'purple'] - >>> pd.MultiIndex.from_product([numbers, colors], - ... names=('number', 'color')) + >>> colors = ["green", "purple"] + >>> pd.MultiIndex.from_product([numbers, colors], names=["number", "color"]) MultiIndex([(0, 'green'), (0, 'purple'), (1, 'green'), @@ -717,9 +715,10 @@ def from_frame( Examples -------- - >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], - ... ['NJ', 'Temp'], ['NJ', 'Precip']], - ... columns=['a', 'b']) + >>> df = pd.DataFrame( + ... [["HI", "Temp"], ["HI", "Precip"], ["NJ", "Temp"], ["NJ", "Precip"]], + ... columns=["a", "b"], + ... ) >>> df a b 0 HI Temp @@ -736,7 +735,7 @@ def from_frame( Using explicit names, instead of the column names - >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) + >>> pd.MultiIndex.from_frame(df, names=["state", "observation"]) MultiIndex([('HI', 'Temp'), ('HI', 'Precip'), ('NJ', 'Temp'), @@ -806,8 +805,9 @@ def dtypes(self) -> Series: Examples -------- - >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')], - ... names=('number', 'color')) + >>> idx = pd.MultiIndex.from_product( + ... [(0, 1, 2), ("green", "purple")], names=["number", "color"] + ... ) >>> idx MultiIndex([(0, 'green'), (0, 'purple'), @@ -860,10 +860,11 @@ def levels(self) -> tuple[Index, ...]: Examples -------- - >>> index = pd.MultiIndex.from_product([['mammal'], - ... ('goat', 'human', 'cat', 'dog')], - ... names=['Category', 'Animals']) - >>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs']) + >>> index = pd.MultiIndex.from_product( + ... [["mammal"], ("goat", "human", "cat", "dog")], + ... names=["Category", "Animals"], + ... ) + >>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=["Legs"]) >>> leg_num Legs Category Animals @@ -972,9 +973,9 @@ def set_levels( ... (2, "one"), ... (2, "two"), ... (3, "one"), - ... (3, "two") + ... (3, "two"), ... ], - ... names=["foo", "bar"] + ... names=["foo", "bar"], ... ) >>> idx MultiIndex([(1, 'one'), @@ -985,7 +986,7 @@ def set_levels( (3, 'two')], names=('foo', 'bar')) - >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) + >>> idx.set_levels([["a", "b", "c"], [1, 2]]) MultiIndex([('a', 1), ('a', 2), ('b', 1), @@ -993,7 +994,7 @@ def set_levels( ('c', 1), ('c', 2)], names=('foo', 'bar')) - >>> idx.set_levels(['a', 'b', 'c'], level=0) + >>> idx.set_levels(["a", "b", "c"], level=0) MultiIndex([('a', 'one'), ('a', 'two'), ('b', 'one'), @@ -1001,7 +1002,7 @@ def set_levels( ('c', 'one'), ('c', 'two')], names=('foo', 'bar')) - >>> idx.set_levels(['a', 'b'], level='bar') + >>> idx.set_levels(["a", "b"], level="bar") MultiIndex([(1, 'a'), (1, 'b'), (2, 'a'), @@ -1015,7 +1016,7 @@ def set_levels( be stored in the MultiIndex levels, though the values will be truncated in the MultiIndex output. - >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) + >>> idx.set_levels([["a", "b", "c"], [1, 2, 3, 4]], level=[0, 1]) MultiIndex([('a', 1), ('a', 2), ('b', 1), @@ -1023,7 +1024,7 @@ def set_levels( ('c', 1), ('c', 2)], names=('foo', 'bar')) - >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels + >>> idx.set_levels([["a", "b", "c"], [1, 2, 3, 4]], level=[0, 1]).levels (Index(['a', 'b', 'c'], dtype='object', name='foo'), Index([1, 2, 3, 4], dtype='int64', name='bar')) """ # noqa: E501 @@ -1049,7 +1050,7 @@ def nlevels(self) -> int: Examples -------- - >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi = pd.MultiIndex.from_arrays([["a"], ["b"], ["c"]]) >>> mi MultiIndex([('a', 'b', 'c')], ) @@ -1065,7 +1066,7 @@ def levshape(self) -> Shape: Examples -------- - >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi = pd.MultiIndex.from_arrays([["a"], ["b"], ["c"]]) >>> mi MultiIndex([('a', 'b', 'c')], ) @@ -1166,7 +1167,7 @@ def set_codes( (2, 'one'), (1, 'two')], names=('foo', 'bar')) - >>> idx.set_codes([0, 0, 1, 1], level='bar') + >>> idx.set_codes([0, 0, 1, 1], level="bar") MultiIndex([(1, 'one'), (1, 'one'), (2, 'two'), @@ -1274,7 +1275,7 @@ def copy( # type: ignore[override] Examples -------- - >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) + >>> mi = pd.MultiIndex.from_arrays([["a"], ["b"], ["c"]]) >>> mi MultiIndex([('a', 'b', 'c')], ) @@ -1817,14 +1818,14 @@ def get_level_values(self, level) -> Index: # type: ignore[override] -------- Create a MultiIndex: - >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) - >>> mi.names = ['level_1', 'level_2'] + >>> mi = pd.MultiIndex.from_arrays((list("abc"), list("def"))) + >>> mi.names = ["level_1", "level_2"] Get level values by supplying level as either integer or name: >>> mi.get_level_values(0) Index(['a', 'b', 'c'], dtype='object', name='level_1') - >>> mi.get_level_values('level_2') + >>> mi.get_level_values("level_2") Index(['d', 'e', 'f'], dtype='object', name='level_2') If a level contains missing values, the return type of the level @@ -1885,7 +1886,7 @@ def to_frame( Examples -------- - >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) + >>> mi = pd.MultiIndex.from_arrays([["a", "b"], ["c", "d"]]) >>> mi MultiIndex([('a', 'c'), ('b', 'd')], @@ -1903,7 +1904,7 @@ def to_frame( 0 a c 1 b d - >>> df = mi.to_frame(name=['x', 'y']) + >>> df = mi.to_frame(name=["x", "y"]) >>> df x y a c a c @@ -1962,8 +1963,8 @@ def to_flat_index(self) -> Index: # type: ignore[override] Examples -------- >>> index = pd.MultiIndex.from_product( - ... [['foo', 'bar'], ['baz', 'qux']], - ... names=('a', 'b')) + ... [["foo", "bar"], ["baz", "qux"]], names=["a", "b"] + ... ) >>> index.to_flat_index() Index([('foo', 'baz'), ('foo', 'qux'), ('bar', 'baz'), ('bar', 'qux')], @@ -1984,25 +1985,29 @@ def _is_lexsorted(self) -> bool: In the below examples, the first level of the MultiIndex is sorted because a>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], - ... ['d', 'e', 'f']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays( + ... [["a", "b", "c"], ["d", "e", "f"]] + ... )._is_lexsorted() True - >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], - ... ['d', 'f', 'e']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays( + ... [["a", "b", "c"], ["d", "f", "e"]] + ... )._is_lexsorted() True In case there is a tie, the lexicographical sorting looks at the next level of the MultiIndex. - >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ["a", "b", "c"]])._is_lexsorted() True - >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays([[0, 1, 1], ["a", "c", "b"]])._is_lexsorted() False - >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], - ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays( + ... [["a", "a", "b", "b"], ["aa", "bb", "aa", "bb"]] + ... )._is_lexsorted() True - >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], - ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() + >>> pd.MultiIndex.from_arrays( + ... [["a", "a", "b", "b"], ["bb", "aa", "aa", "bb"]] + ... )._is_lexsorted() False """ return self._lexsort_depth == self.nlevels @@ -2039,8 +2044,9 @@ def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIn Examples -------- - >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi = pd.MultiIndex( + ... levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ... ) >>> mi MultiIndex([('a', 'bb'), ('a', 'aa'), @@ -2103,7 +2109,7 @@ def remove_unused_levels(self) -> MultiIndex: Examples -------- - >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) + >>> mi = pd.MultiIndex.from_product([range(2), list("ab")]) >>> mi MultiIndex([(0, 'a'), (0, 'b'), @@ -2290,7 +2296,7 @@ def append(self, other): Examples -------- - >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']]) + >>> mi = pd.MultiIndex.from_arrays([["a"], ["b"]]) >>> mi MultiIndex([('a', 'b')], ) @@ -2385,8 +2391,9 @@ def drop( # type: ignore[override] Examples -------- - >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')], - ... names=["number", "color"]) + >>> idx = pd.MultiIndex.from_product( + ... [(0, 1, 2), ("green", "purple")], names=["number", "color"] + ... ) >>> idx MultiIndex([(0, 'green'), (0, 'purple'), @@ -2395,7 +2402,7 @@ def drop( # type: ignore[override] (2, 'green'), (2, 'purple')], names=('number', 'color')) - >>> idx.drop([(1, 'green'), (2, 'purple')]) + >>> idx.drop([(1, "green"), (2, "purple")]) MultiIndex([(0, 'green'), (0, 'purple'), (1, 'purple'), @@ -2404,7 +2411,7 @@ def drop( # type: ignore[override] We can also drop from a specific level. - >>> idx.drop('green', level='color') + >>> idx.drop("green", level="color") MultiIndex([(0, 'purple'), (1, 'purple'), (2, 'purple')], @@ -2503,8 +2510,9 @@ def swaplevel(self, i=-2, j=-1) -> MultiIndex: Examples -------- - >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], - ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) + >>> mi = pd.MultiIndex( + ... levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] + ... ) >>> mi MultiIndex([('a', 'bb'), ('a', 'aa'), @@ -2549,7 +2557,7 @@ def reorder_levels(self, order) -> MultiIndex: Examples -------- - >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) + >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=["x", "y"]) >>> mi MultiIndex([(1, 3), (2, 4)], @@ -2560,7 +2568,7 @@ def reorder_levels(self, order) -> MultiIndex: (4, 2)], names=('y', 'x')) - >>> mi.reorder_levels(order=['y', 'x']) + >>> mi.reorder_levels(order=["y", "x"]) MultiIndex([(3, 1), (4, 2)], names=('y', 'x')) @@ -2835,18 +2843,18 @@ def get_slice_bound( Examples -------- - >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) + >>> mi = pd.MultiIndex.from_arrays([list("abbc"), list("gefd")]) Get the locations from the leftmost 'b' in the first level until the end of the multiindex: - >>> mi.get_slice_bound('b', side="left") + >>> mi.get_slice_bound("b", side="left") 1 Like above, but if you get the locations from the rightmost 'b' in the first level and 'f' in the second level: - >>> mi.get_slice_bound(('b','f'), side="right") + >>> mi.get_slice_bound(("b", "f"), side="right") 3 See Also @@ -2890,19 +2898,20 @@ def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: Examples -------- - >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], - ... names=['A', 'B']) + >>> mi = pd.MultiIndex.from_arrays( + ... [list("abbd"), list("deff")], names=["A", "B"] + ... ) Get the slice locations from the beginning of 'b' in the first level until the end of the multiindex: - >>> mi.slice_locs(start='b') + >>> mi.slice_locs(start="b") (1, 4) Like above, but stop at the end of 'b' in the first level and 'f' in the second level: - >>> mi.slice_locs(start='b', end=('b', 'f')) + >>> mi.slice_locs(start="b", end=("b", "f")) (1, 3) See Also @@ -3026,12 +3035,12 @@ def get_loc(self, key): Examples -------- - >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + >>> mi = pd.MultiIndex.from_arrays([list("abb"), list("def")]) - >>> mi.get_loc('b') + >>> mi.get_loc("b") slice(1, 3, None) - >>> mi.get_loc(('b', 'e')) + >>> mi.get_loc(("b", "e")) 1 """ self._check_indexing_error(key) @@ -3144,16 +3153,15 @@ def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): Examples -------- - >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], - ... names=['A', 'B']) + >>> mi = pd.MultiIndex.from_arrays([list("abb"), list("def")], names=["A", "B"]) - >>> mi.get_loc_level('b') + >>> mi.get_loc_level("b") (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) - >>> mi.get_loc_level('e', level='B') + >>> mi.get_loc_level("e", level="B") (array([False, True, False]), Index(['b'], dtype='object', name='A')) - >>> mi.get_loc_level(['b', 'e']) + >>> mi.get_loc_level(["b", "e"]) (1, None) """ if not isinstance(level, (list, tuple)): @@ -3455,15 +3463,15 @@ def get_locs(self, seq) -> npt.NDArray[np.intp]: Examples -------- - >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) + >>> mi = pd.MultiIndex.from_arrays([list("abb"), list("def")]) - >>> mi.get_locs('b') # doctest: +SKIP + >>> mi.get_locs("b") # doctest: +SKIP array([1, 2], dtype=int64) - >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP + >>> mi.get_locs([slice(None), ["e", "f"]]) # doctest: +SKIP array([1, 2], dtype=int64) - >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP + >>> mi.get_locs([[True, False, True], slice("e", "f")]) # doctest: +SKIP array([2], dtype=int64) """ @@ -3675,11 +3683,11 @@ def truncate(self, before=None, after=None) -> MultiIndex: Examples -------- - >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']]) + >>> mi = pd.MultiIndex.from_arrays([["a", "b", "c"], ["x", "y", "z"]]) >>> mi MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')], ) - >>> mi.truncate(before='a', after='b') + >>> mi.truncate(before="a", after="b") MultiIndex([('a', 'x'), ('b', 'y')], ) """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ab499665b13ed..a7315d40f0236 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -156,7 +156,7 @@ class PeriodIndex(DatetimeIndexOpsMixin): Examples -------- - >>> idx = pd.PeriodIndex(data=['2000Q1', '2002Q3'], freq='Q') + >>> idx = pd.PeriodIndex(data=["2000Q1", "2002Q3"], freq="Q") >>> idx PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]') """ @@ -374,7 +374,7 @@ def from_ordinals(cls, ordinals, *, freq, name=None) -> Self: Examples -------- - >>> idx = pd.PeriodIndex.from_ordinals([-1, 0, 1], freq='Q') + >>> idx = pd.PeriodIndex.from_ordinals([-1, 0, 1], freq="Q") >>> idx PeriodIndex(['1969Q4', '1970Q1', '1970Q2'], dtype='period[Q-DEC]') """ @@ -617,7 +617,7 @@ def period_range( Examples -------- - >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M') + >>> pd.period_range(start="2017-01-01", end="2018-01-01", freq="M") PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06', '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12', '2018-01'], @@ -627,8 +627,11 @@ def period_range( endpoints for a ``PeriodIndex`` with frequency matching that of the ``period_range`` constructor. - >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'), - ... end=pd.Period('2017Q2', freq='Q'), freq='M') + >>> pd.period_range( + ... start=pd.Period("2017Q1", freq="Q"), + ... end=pd.Period("2017Q2", freq="Q"), + ... freq="M", + ... ) PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'], dtype='period[M]') """ diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index db813b047b2bb..485c7a1ce08cd 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -114,13 +114,13 @@ class TimedeltaIndex(DatetimeTimedeltaMixin): Examples -------- - >>> pd.TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days']) + >>> pd.TimedeltaIndex(["0 days", "1 days", "2 days", "3 days", "4 days"]) TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) We can also let pandas infer the frequency when possible. - >>> pd.TimedeltaIndex(np.arange(5) * 24 * 3600 * 1e9, freq='infer') + >>> pd.TimedeltaIndex(np.arange(5) * 24 * 3600 * 1e9, freq="infer") TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq='D') """ @@ -316,14 +316,14 @@ def timedelta_range( Examples -------- - >>> pd.timedelta_range(start='1 day', periods=4) + >>> pd.timedelta_range(start="1 day", periods=4) TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq='D') The ``closed`` parameter specifies which endpoint is included. The default behavior is to include both endpoints. - >>> pd.timedelta_range(start='1 day', periods=4, closed='right') + >>> pd.timedelta_range(start="1 day", periods=4, closed="right") TimedeltaIndex(['2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq='D') @@ -331,7 +331,7 @@ def timedelta_range( Only fixed frequencies can be passed, non-fixed frequencies such as 'M' (month end) will raise. - >>> pd.timedelta_range(start='1 day', end='2 days', freq='6h') + >>> pd.timedelta_range(start="1 day", end="2 days", freq="6h") TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00', '1 days 18:00:00', '2 days 00:00:00'], dtype='timedelta64[ns]', freq='6h') @@ -339,7 +339,7 @@ def timedelta_range( Specify ``start``, ``end``, and ``periods``; the frequency is generated automatically (linearly spaced). - >>> pd.timedelta_range(start='1 day', end='5 days', periods=4) + >>> pd.timedelta_range(start="1 day", end="5 days", periods=4) TimedeltaIndex(['1 days 00:00:00', '2 days 08:00:00', '3 days 16:00:00', '5 days 00:00:00'], dtype='timedelta64[ns]', freq=None) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 91e9d6fd602a6..4ccac6449d835 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -116,14 +116,17 @@ class _IndexSlice: Examples -------- - >>> midx = pd.MultiIndex.from_product([['A0', 'A1'], ['B0', 'B1', 'B2', 'B3']]) - >>> columns = ['foo', 'bar'] - >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))), - ... index=midx, columns=columns) + >>> midx = pd.MultiIndex.from_product([["A0", "A1"], ["B0", "B1", "B2", "B3"]]) + >>> columns = ["foo", "bar"] + >>> dfmi = pd.DataFrame( + ... np.arange(16).reshape((len(midx), len(columns))), + ... index=midx, + ... columns=columns, + ... ) Using the default slice command: - >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :] + >>> dfmi.loc[(slice(None), slice("B0", "B1")), :] foo bar A0 B0 0 1 B1 2 3 @@ -133,7 +136,7 @@ class _IndexSlice: Using the IndexSlice class for a more intuitive command: >>> idx = pd.IndexSlice - >>> dfmi.loc[idx[:, 'B0':'B1'], :] + >>> dfmi.loc[idx[:, "B0":"B1"], :] foo bar A0 B0 0 1 B1 2 3 @@ -195,9 +198,11 @@ def iloc(self) -> _iLocIndexer: Examples -------- - >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, - ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, - ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}] + >>> mydict = [ + ... {"a": 1, "b": 2, "c": 3, "d": 4}, + ... {"a": 100, "b": 200, "c": 300, "d": 400}, + ... {"a": 1000, "b": 2000, "c": 3000, "d": 4000}, + ... ] >>> df = pd.DataFrame(mydict) >>> df a b c d @@ -345,9 +350,11 @@ def loc(self) -> _LocIndexer: -------- **Getting values** - >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], - ... index=['cobra', 'viper', 'sidewinder'], - ... columns=['max_speed', 'shield']) + >>> df = pd.DataFrame( + ... [[1, 2], [4, 5], [7, 8]], + ... index=["cobra", "viper", "sidewinder"], + ... columns=["max_speed", "shield"], + ... ) >>> df max_speed shield cobra 1 2 @@ -356,27 +363,27 @@ def loc(self) -> _LocIndexer: Single label. Note this returns the row as a Series. - >>> df.loc['viper'] + >>> df.loc["viper"] max_speed 4 shield 5 Name: viper, dtype: int64 List of labels. Note using ``[[]]`` returns a DataFrame. - >>> df.loc[['viper', 'sidewinder']] + >>> df.loc[["viper", "sidewinder"]] max_speed shield viper 4 5 sidewinder 7 8 Single label for row and column - >>> df.loc['cobra', 'shield'] + >>> df.loc["cobra", "shield"] 2 Slice with labels for row and single label for column. As mentioned above, note that both the start and stop of the slice are included. - >>> df.loc['cobra':'viper', 'max_speed'] + >>> df.loc["cobra":"viper", "max_speed"] cobra 1 viper 4 Name: max_speed, dtype: int64 @@ -389,8 +396,9 @@ def loc(self) -> _LocIndexer: Alignable boolean Series: - >>> df.loc[pd.Series([False, True, False], - ... index=['viper', 'sidewinder', 'cobra'])] + >>> df.loc[ + ... pd.Series([False, True, False], index=["viper", "sidewinder", "cobra"]) + ... ] max_speed shield sidewinder 7 8 @@ -404,25 +412,25 @@ def loc(self) -> _LocIndexer: Conditional that returns a boolean Series - >>> df.loc[df['shield'] > 6] + >>> df.loc[df["shield"] > 6] max_speed shield sidewinder 7 8 Conditional that returns a boolean Series with column labels specified - >>> df.loc[df['shield'] > 6, ['max_speed']] + >>> df.loc[df["shield"] > 6, ["max_speed"]] max_speed sidewinder 7 Multiple conditional using ``&`` that returns a boolean Series - >>> df.loc[(df['max_speed'] > 1) & (df['shield'] < 8)] + >>> df.loc[(df["max_speed"] > 1) & (df["shield"] < 8)] max_speed shield viper 4 5 Multiple conditional using ``|`` that returns a boolean Series - >>> df.loc[(df['max_speed'] > 4) | (df['shield'] < 5)] + >>> df.loc[(df["max_speed"] > 4) | (df["shield"] < 5)] max_speed shield cobra 1 2 sidewinder 7 8 @@ -439,7 +447,7 @@ def loc(self) -> _LocIndexer: Callable that returns a boolean Series - >>> df.loc[lambda df: df['shield'] == 8] + >>> df.loc[lambda df: df["shield"] == 8] max_speed shield sidewinder 7 8 @@ -447,7 +455,7 @@ def loc(self) -> _LocIndexer: Set value for all items matching the list of labels - >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50 + >>> df.loc[["viper", "sidewinder"], ["shield"]] = 50 >>> df max_speed shield cobra 1 2 @@ -456,7 +464,7 @@ def loc(self) -> _LocIndexer: Set value for an entire row - >>> df.loc['cobra'] = 10 + >>> df.loc["cobra"] = 10 >>> df max_speed shield cobra 10 10 @@ -465,7 +473,7 @@ def loc(self) -> _LocIndexer: Set value for an entire column - >>> df.loc[:, 'max_speed'] = 30 + >>> df.loc[:, "max_speed"] = 30 >>> df max_speed shield cobra 30 10 @@ -474,7 +482,7 @@ def loc(self) -> _LocIndexer: Set value for rows matching callable condition - >>> df.loc[df['shield'] > 35] = 0 + >>> df.loc[df["shield"] > 35] = 0 >>> df max_speed shield cobra 30 10 @@ -505,8 +513,11 @@ def loc(self) -> _LocIndexer: Another example using integers for the index - >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]], - ... index=[7, 8, 9], columns=['max_speed', 'shield']) + >>> df = pd.DataFrame( + ... [[1, 2], [4, 5], [7, 8]], + ... index=[7, 8, 9], + ... columns=["max_speed", "shield"], + ... ) >>> df max_speed shield 7 1 2 @@ -527,14 +538,16 @@ def loc(self) -> _LocIndexer: A number of examples using a DataFrame with a MultiIndex >>> tuples = [ - ... ('cobra', 'mark i'), ('cobra', 'mark ii'), - ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), - ... ('viper', 'mark ii'), ('viper', 'mark iii') + ... ("cobra", "mark i"), + ... ("cobra", "mark ii"), + ... ("sidewinder", "mark i"), + ... ("sidewinder", "mark ii"), + ... ("viper", "mark ii"), + ... ("viper", "mark iii"), ... ] >>> index = pd.MultiIndex.from_tuples(tuples) - >>> values = [[12, 2], [0, 4], [10, 20], - ... [1, 4], [7, 1], [16, 36]] - >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) + >>> values = [[12, 2], [0, 4], [10, 20], [1, 4], [7, 1], [16, 36]] + >>> df = pd.DataFrame(values, columns=["max_speed", "shield"], index=index) >>> df max_speed shield cobra mark i 12 2 @@ -546,14 +559,14 @@ def loc(self) -> _LocIndexer: Single label. Note this returns a DataFrame with a single index. - >>> df.loc['cobra'] + >>> df.loc["cobra"] max_speed shield mark i 12 2 mark ii 0 4 Single index tuple. Note this returns a Series. - >>> df.loc[('cobra', 'mark ii')] + >>> df.loc[("cobra", "mark ii")] max_speed 0 shield 4 Name: (cobra, mark ii), dtype: int64 @@ -561,25 +574,25 @@ def loc(self) -> _LocIndexer: Single label for row and column. Similar to passing in a tuple, this returns a Series. - >>> df.loc['cobra', 'mark i'] + >>> df.loc["cobra", "mark i"] max_speed 12 shield 2 Name: (cobra, mark i), dtype: int64 Single tuple. Note using ``[[]]`` returns a DataFrame. - >>> df.loc[[('cobra', 'mark ii')]] + >>> df.loc[[("cobra", "mark ii")]] max_speed shield cobra mark ii 0 4 Single tuple for the index with a single label for the column - >>> df.loc[('cobra', 'mark i'), 'shield'] + >>> df.loc[("cobra", "mark i"), "shield"] 2 Slice from index tuple to single label - >>> df.loc[('cobra', 'mark i'):'viper'] + >>> df.loc[("cobra", "mark i") : "viper"] max_speed shield cobra mark i 12 2 mark ii 0 4 @@ -590,7 +603,7 @@ def loc(self) -> _LocIndexer: Slice from index tuple to index tuple - >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')] + >>> df.loc[("cobra", "mark i") : ("viper", "mark ii")] max_speed shield cobra mark i 12 2 mark ii 0 4 @@ -642,8 +655,11 @@ def at(self) -> _AtIndexer: Examples -------- - >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], - ... index=[4, 5, 6], columns=['A', 'B', 'C']) + >>> df = pd.DataFrame( + ... [[0, 2, 3], [0, 4, 1], [10, 20, 30]], + ... index=[4, 5, 6], + ... columns=["A", "B", "C"], + ... ) >>> df A B C 4 0 2 3 @@ -652,18 +668,18 @@ def at(self) -> _AtIndexer: Get value at specified row/column pair - >>> df.at[4, 'B'] + >>> df.at[4, "B"] 2 Set value at specified row/column pair - >>> df.at[4, 'B'] = 10 - >>> df.at[4, 'B'] + >>> df.at[4, "B"] = 10 + >>> df.at[4, "B"] 10 Get value within a Series - >>> df.loc[5].at['B'] + >>> df.loc[5].at["B"] 4 """ return _AtIndexer("at", self) @@ -690,8 +706,9 @@ def iat(self) -> _iAtIndexer: Examples -------- - >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]], - ... columns=['A', 'B', 'C']) + >>> df = pd.DataFrame( + ... [[0, 2, 3], [0, 4, 1], [10, 20, 30]], columns=["A", "B", "C"] + ... ) >>> df A B C 0 0 2 3 diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index ba2d275e88b32..b296e6016a1ac 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -50,12 +50,13 @@ def from_dataframe(df, allow_copy: bool = True) -> pd.DataFrame: Examples -------- - >>> df_not_necessarily_pandas = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df_not_necessarily_pandas = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> interchange_object = df_not_necessarily_pandas.__dataframe__() >>> interchange_object.column_names() Index(['A', 'B'], dtype='object') - >>> df_pandas = (pd.api.interchange.from_dataframe - ... (interchange_object.select_columns_by_name(['A']))) + >>> df_pandas = pd.api.interchange.from_dataframe( + ... interchange_object.select_columns_by_name(["A"]) + ... ) >>> df_pandas A 0 1 diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 2ca42d1621b97..cdc2ff6c51b06 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -1100,7 +1100,7 @@ def _interp_limit( def _interp_limit(invalid, fw_limit, bw_limit): for x in np.where(invalid)[0]: - if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): + if invalid[max(0, x - fw_limit) : x + bw_limit + 1].all(): yield x """ # handle forward first; the backward direction is the same except diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index fb5980184355c..0404da189dfa5 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -89,9 +89,9 @@ def mask_zero_div_zero(x, y, result: np.ndarray) -> np.ndarray: >>> x = np.array([1, 0, -1], dtype=np.int64) >>> x array([ 1, 0, -1]) - >>> y = 0 # int 0; numpy behavior is different with float + >>> y = 0 # int 0; numpy behavior is different with float >>> result = x // y - >>> result # raw numpy result does not fill division by zero + >>> result # raw numpy result does not fill division by zero array([0, 0, 0]) >>> mask_zero_div_zero(x, y, result) array([ inf, nan, -inf]) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2a36c0f1ef549..34c61c6f26106 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -395,16 +395,13 @@ def transform(self, arg, *args, **kwargs): Examples -------- - >>> s = pd.Series([1, 2], - ... index=pd.date_range('20180101', - ... periods=2, - ... freq='1h')) + >>> s = pd.Series([1, 2], index=pd.date_range("20180101", periods=2, freq="1h")) >>> s 2018-01-01 00:00:00 1 2018-01-01 01:00:00 2 Freq: h, dtype: int64 - >>> resampled = s.resample('15min') + >>> resampled = s.resample("15min") >>> resampled.transform(lambda x: (x - x.mean()) / x.std()) 2018-01-01 00:00:00 NaN 2018-01-01 01:00:00 NaN @@ -557,8 +554,12 @@ def ffill(self, limit: int | None = None): -------- Here we only create a ``Series``. - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 @@ -568,7 +569,7 @@ def ffill(self, limit: int | None = None): Example for ``ffill`` with downsampling (we have fewer dates after resampling): - >>> ser.resample('MS').ffill() + >>> ser.resample("MS").ffill() 2023-01-01 1 2023-02-01 3 Freq: MS, dtype: int64 @@ -576,7 +577,7 @@ def ffill(self, limit: int | None = None): Example for ``ffill`` with upsampling (fill the new dates with the previous value): - >>> ser.resample('W').ffill() + >>> ser.resample("W").ffill() 2023-01-01 1 2023-01-08 1 2023-01-15 2 @@ -590,7 +591,7 @@ def ffill(self, limit: int | None = None): With upsampling and limiting (only fill the first new date with the previous value): - >>> ser.resample('W').ffill(limit=1) + >>> ser.resample("W").ffill(limit=1) 2023-01-01 1.0 2023-01-08 1.0 2023-01-15 2.0 @@ -635,16 +636,13 @@ def nearest(self, limit: int | None = None): Examples -------- - >>> s = pd.Series([1, 2], - ... index=pd.date_range('20180101', - ... periods=2, - ... freq='1h')) + >>> s = pd.Series([1, 2], index=pd.date_range("20180101", periods=2, freq="1h")) >>> s 2018-01-01 00:00:00 1 2018-01-01 01:00:00 2 Freq: h, dtype: int64 - >>> s.resample('15min').nearest() + >>> s.resample("15min").nearest() 2018-01-01 00:00:00 1 2018-01-01 00:15:00 1 2018-01-01 00:30:00 2 @@ -654,7 +652,7 @@ def nearest(self, limit: int | None = None): Limit the number of upsampled values imputed by the nearest: - >>> s.resample('15min').nearest(limit=1) + >>> s.resample("15min").nearest(limit=1) 2018-01-01 00:00:00 1.0 2018-01-01 00:15:00 1.0 2018-01-01 00:30:00 NaN @@ -706,15 +704,16 @@ def bfill(self, limit: int | None = None): -------- Resampling a Series: - >>> s = pd.Series([1, 2, 3], - ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s = pd.Series( + ... [1, 2, 3], index=pd.date_range("20180101", periods=3, freq="h") + ... ) >>> s 2018-01-01 00:00:00 1 2018-01-01 01:00:00 2 2018-01-01 02:00:00 3 Freq: h, dtype: int64 - >>> s.resample('30min').bfill() + >>> s.resample("30min").bfill() 2018-01-01 00:00:00 1 2018-01-01 00:30:00 2 2018-01-01 01:00:00 2 @@ -722,7 +721,7 @@ def bfill(self, limit: int | None = None): 2018-01-01 02:00:00 3 Freq: 30min, dtype: int64 - >>> s.resample('15min').bfill(limit=2) + >>> s.resample("15min").bfill(limit=2) 2018-01-01 00:00:00 1.0 2018-01-01 00:15:00 NaN 2018-01-01 00:30:00 2.0 @@ -736,16 +735,17 @@ def bfill(self, limit: int | None = None): Resampling a DataFrame that has missing values: - >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, - ... index=pd.date_range('20180101', periods=3, - ... freq='h')) + >>> df = pd.DataFrame( + ... {"a": [2, np.nan, 6], "b": [1, 3, 5]}, + ... index=pd.date_range("20180101", periods=3, freq="h"), + ... ) >>> df a b 2018-01-01 00:00:00 2.0 1 2018-01-01 01:00:00 NaN 3 2018-01-01 02:00:00 6.0 5 - >>> df.resample('30min').bfill() + >>> df.resample("30min").bfill() a b 2018-01-01 00:00:00 2.0 1 2018-01-01 00:30:00 NaN 3 @@ -753,7 +753,7 @@ def bfill(self, limit: int | None = None): 2018-01-01 01:30:00 6.0 5 2018-01-01 02:00:00 6.0 5 - >>> df.resample('15min').bfill(limit=2) + >>> df.resample("15min").bfill(limit=2) a b 2018-01-01 00:00:00 2.0 1.0 2018-01-01 00:15:00 NaN NaN @@ -818,8 +818,9 @@ def fillna(self, method, limit: int | None = None): -------- Resampling a Series: - >>> s = pd.Series([1, 2, 3], - ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> s = pd.Series( + ... [1, 2, 3], index=pd.date_range("20180101", periods=3, freq="h") + ... ) >>> s 2018-01-01 00:00:00 1 2018-01-01 01:00:00 2 @@ -836,7 +837,7 @@ def fillna(self, method, limit: int | None = None): 2018-01-01 02:00:00 3.0 Freq: 30min, dtype: float64 - >>> s.resample('30min').fillna("backfill") + >>> s.resample("30min").fillna("backfill") 2018-01-01 00:00:00 1 2018-01-01 00:30:00 2 2018-01-01 01:00:00 2 @@ -844,7 +845,7 @@ def fillna(self, method, limit: int | None = None): 2018-01-01 02:00:00 3 Freq: 30min, dtype: int64 - >>> s.resample('15min').fillna("backfill", limit=2) + >>> s.resample("15min").fillna("backfill", limit=2) 2018-01-01 00:00:00 1.0 2018-01-01 00:15:00 NaN 2018-01-01 00:30:00 2.0 @@ -856,7 +857,7 @@ def fillna(self, method, limit: int | None = None): 2018-01-01 02:00:00 3.0 Freq: 15min, dtype: float64 - >>> s.resample('30min').fillna("pad") + >>> s.resample("30min").fillna("pad") 2018-01-01 00:00:00 1 2018-01-01 00:30:00 1 2018-01-01 01:00:00 2 @@ -864,7 +865,7 @@ def fillna(self, method, limit: int | None = None): 2018-01-01 02:00:00 3 Freq: 30min, dtype: int64 - >>> s.resample('30min').fillna("nearest") + >>> s.resample("30min").fillna("nearest") 2018-01-01 00:00:00 1 2018-01-01 00:30:00 2 2018-01-01 01:00:00 2 @@ -874,15 +875,16 @@ def fillna(self, method, limit: int | None = None): Missing values present before the upsampling are not affected. - >>> sm = pd.Series([1, None, 3], - ... index=pd.date_range('20180101', periods=3, freq='h')) + >>> sm = pd.Series( + ... [1, None, 3], index=pd.date_range("20180101", periods=3, freq="h") + ... ) >>> sm 2018-01-01 00:00:00 1.0 2018-01-01 01:00:00 NaN 2018-01-01 02:00:00 3.0 Freq: h, dtype: float64 - >>> sm.resample('30min').fillna('backfill') + >>> sm.resample("30min").fillna("backfill") 2018-01-01 00:00:00 1.0 2018-01-01 00:30:00 NaN 2018-01-01 01:00:00 NaN @@ -890,7 +892,7 @@ def fillna(self, method, limit: int | None = None): 2018-01-01 02:00:00 3.0 Freq: 30min, dtype: float64 - >>> sm.resample('30min').fillna('pad') + >>> sm.resample("30min").fillna("pad") 2018-01-01 00:00:00 1.0 2018-01-01 00:30:00 1.0 2018-01-01 01:00:00 NaN @@ -898,7 +900,7 @@ def fillna(self, method, limit: int | None = None): 2018-01-01 02:00:00 3.0 Freq: 30min, dtype: float64 - >>> sm.resample('30min').fillna('nearest') + >>> sm.resample("30min").fillna("nearest") 2018-01-01 00:00:00 1.0 2018-01-01 00:30:00 NaN 2018-01-01 01:00:00 NaN @@ -909,16 +911,17 @@ def fillna(self, method, limit: int | None = None): DataFrame resampling is done column-wise. All the same options are available. - >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]}, - ... index=pd.date_range('20180101', periods=3, - ... freq='h')) + >>> df = pd.DataFrame( + ... {"a": [2, np.nan, 6], "b": [1, 3, 5]}, + ... index=pd.date_range("20180101", periods=3, freq="h"), + ... ) >>> df a b 2018-01-01 00:00:00 2.0 1 2018-01-01 01:00:00 NaN 3 2018-01-01 02:00:00 6.0 5 - >>> df.resample('30min').fillna("bfill") + >>> df.resample("30min").fillna("bfill") a b 2018-01-01 00:00:00 2.0 1 2018-01-01 00:30:00 NaN 3 @@ -1136,15 +1139,19 @@ def asfreq(self, fill_value=None): Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-31', '2023-02-01', '2023-02-28'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-31", "2023-02-01", "2023-02-28"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-31 2 2023-02-01 3 2023-02-28 4 dtype: int64 - >>> ser.resample('MS').asfreq() + >>> ser.resample("MS").asfreq() 2023-01-01 1 2023-02-01 3 Freq: MS, dtype: int64 @@ -1180,15 +1187,19 @@ def sum( Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').sum() + >>> ser.resample("MS").sum() 2023-01-01 3 2023-02-01 7 Freq: MS, dtype: int64 @@ -1224,15 +1235,19 @@ def prod( Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').prod() + >>> ser.resample("MS").prod() 2023-01-01 2 2023-02-01 12 Freq: MS, dtype: int64 @@ -1254,15 +1269,19 @@ def min( Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').min() + >>> ser.resample("MS").min() 2023-01-01 1 2023-02-01 3 Freq: MS, dtype: int64 @@ -1284,15 +1303,19 @@ def max( Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').max() + >>> ser.resample("MS").max() 2023-01-01 2 2023-02-01 4 Freq: MS, dtype: int64 @@ -1353,15 +1376,19 @@ def mean( Examples -------- - >>> ser = pd.Series([1, 2, 3, 4], index=pd.DatetimeIndex( - ... ['2023-01-01', '2023-01-15', '2023-02-01', '2023-02-15'])) + >>> ser = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.DatetimeIndex( + ... ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"] + ... ), + ... ) >>> ser 2023-01-01 1 2023-01-15 2 2023-02-01 3 2023-02-15 4 dtype: int64 - >>> ser.resample('MS').mean() + >>> ser.resample("MS").mean() 2023-01-01 1.5 2023-02-01 3.5 Freq: MS, dtype: float64 @@ -1398,14 +1425,20 @@ def std( Examples -------- - >>> ser = pd.Series([1, 3, 2, 4, 3, 8], - ... index=pd.DatetimeIndex(['2023-01-01', - ... '2023-01-10', - ... '2023-01-15', - ... '2023-02-01', - ... '2023-02-10', - ... '2023-02-15'])) - >>> ser.resample('MS').std() + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").std() 2023-01-01 1.000000 2023-02-01 2.645751 Freq: MS, dtype: float64 @@ -1443,19 +1476,25 @@ def var( Examples -------- - >>> ser = pd.Series([1, 3, 2, 4, 3, 8], - ... index=pd.DatetimeIndex(['2023-01-01', - ... '2023-01-10', - ... '2023-01-15', - ... '2023-02-01', - ... '2023-02-10', - ... '2023-02-15'])) - >>> ser.resample('MS').var() + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").var() 2023-01-01 1.0 2023-02-01 7.0 Freq: MS, dtype: float64 - >>> ser.resample('MS').var(ddof=0) + >>> ser.resample("MS").var(ddof=0) 2023-01-01 0.666667 2023-02-01 4.666667 Freq: MS, dtype: float64 @@ -1563,19 +1602,25 @@ def quantile(self, q: float | list[float] | AnyArrayLike = 0.5, **kwargs): Examples -------- - >>> ser = pd.Series([1, 3, 2, 4, 3, 8], - ... index=pd.DatetimeIndex(['2023-01-01', - ... '2023-01-10', - ... '2023-01-15', - ... '2023-02-01', - ... '2023-02-10', - ... '2023-02-15'])) - >>> ser.resample('MS').quantile() + >>> ser = pd.Series( + ... [1, 3, 2, 4, 3, 8], + ... index=pd.DatetimeIndex( + ... [ + ... "2023-01-01", + ... "2023-01-10", + ... "2023-01-15", + ... "2023-02-01", + ... "2023-02-10", + ... "2023-02-15", + ... ] + ... ), + ... ) + >>> ser.resample("MS").quantile() 2023-01-01 2.0 2023-02-01 4.0 Freq: MS, dtype: float64 - >>> ser.resample('MS').quantile(.25) + >>> ser.resample("MS").quantile(0.25) 2023-01-01 1.5 2023-02-01 3.5 Freq: MS, dtype: float64 diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 2558532bfb029..7e0bdbcb0ddba 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -238,8 +238,8 @@ def concat( -------- Combine two ``Series``. - >>> s1 = pd.Series(['a', 'b']) - >>> s2 = pd.Series(['c', 'd']) + >>> s1 = pd.Series(["a", "b"]) + >>> s2 = pd.Series(["c", "d"]) >>> pd.concat([s1, s2]) 0 a 1 b @@ -260,7 +260,7 @@ def concat( Add a hierarchical index at the outermost level of the data with the ``keys`` option. - >>> pd.concat([s1, s2], keys=['s1', 's2']) + >>> pd.concat([s1, s2], keys=["s1", "s2"]) s1 0 a 1 b s2 0 c @@ -269,8 +269,7 @@ def concat( Label the index keys you create with the ``names`` option. - >>> pd.concat([s1, s2], keys=['s1', 's2'], - ... names=['Series name', 'Row ID']) + >>> pd.concat([s1, s2], keys=["s1", "s2"], names=["Series name", "Row ID"]) Series name Row ID s1 0 a 1 b @@ -280,14 +279,12 @@ def concat( Combine two ``DataFrame`` objects with identical columns. - >>> df1 = pd.DataFrame([['a', 1], ['b', 2]], - ... columns=['letter', 'number']) + >>> df1 = pd.DataFrame([["a", 1], ["b", 2]], columns=["letter", "number"]) >>> df1 letter number 0 a 1 1 b 2 - >>> df2 = pd.DataFrame([['c', 3], ['d', 4]], - ... columns=['letter', 'number']) + >>> df2 = pd.DataFrame([["c", 3], ["d", 4]], columns=["letter", "number"]) >>> df2 letter number 0 c 3 @@ -303,8 +300,9 @@ def concat( and return everything. Columns outside the intersection will be filled with ``NaN`` values. - >>> df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], - ... columns=['letter', 'number', 'animal']) + >>> df3 = pd.DataFrame( + ... [["c", 3, "cat"], ["d", 4, "dog"]], columns=["letter", "number", "animal"] + ... ) >>> df3 letter number animal 0 c 3 cat @@ -330,8 +328,9 @@ def concat( Combine ``DataFrame`` objects horizontally along the x axis by passing in ``axis=1``. - >>> df4 = pd.DataFrame([['bird', 'polly'], ['monkey', 'george']], - ... columns=['animal', 'name']) + >>> df4 = pd.DataFrame( + ... [["bird", "polly"], ["monkey", "george"]], columns=["animal", "name"] + ... ) >>> pd.concat([df1, df4], axis=1) letter number animal name 0 a 1 bird polly @@ -340,11 +339,11 @@ def concat( Prevent the result from including duplicate index values with the ``verify_integrity`` option. - >>> df5 = pd.DataFrame([1], index=['a']) + >>> df5 = pd.DataFrame([1], index=["a"]) >>> df5 0 a 1 - >>> df6 = pd.DataFrame([2], index=['a']) + >>> df6 = pd.DataFrame([2], index=["a"]) >>> df6 0 a 2 @@ -355,11 +354,11 @@ def concat( Append a single row to the end of a ``DataFrame`` object. - >>> df7 = pd.DataFrame({'a': 1, 'b': 2}, index=[0]) + >>> df7 = pd.DataFrame({"a": 1, "b": 2}, index=[0]) >>> df7 a b 0 1 2 - >>> new_row = pd.Series({'a': 3, 'b': 4}) + >>> new_row = pd.Series({"a": 3, "b": 4}) >>> new_row a 3 b 4 diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 2c74538175a58..fae5c082c72a0 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -101,7 +101,7 @@ def get_dummies( Examples -------- - >>> s = pd.Series(list('abca')) + >>> s = pd.Series(list("abca")) >>> pd.get_dummies(s) a b c @@ -110,7 +110,7 @@ def get_dummies( 2 False False True 3 True False False - >>> s1 = ['a', 'b', np.nan] + >>> s1 = ["a", "b", np.nan] >>> pd.get_dummies(s1) a b @@ -124,16 +124,15 @@ def get_dummies( 1 False True False 2 False False True - >>> df = pd.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'], - ... 'C': [1, 2, 3]}) + >>> df = pd.DataFrame({"A": ["a", "b", "a"], "B": ["b", "a", "c"], "C": [1, 2, 3]}) - >>> pd.get_dummies(df, prefix=['col1', 'col2']) + >>> pd.get_dummies(df, prefix=["col1", "col2"]) C col1_a col1_b col2_a col2_b col2_c 0 1 True False False True False 1 2 False True True False False 2 3 True False False False True - >>> pd.get_dummies(pd.Series(list('abcaa'))) + >>> pd.get_dummies(pd.Series(list("abcaa"))) a b c 0 True False False 1 False True False @@ -141,7 +140,7 @@ def get_dummies( 3 True False False 4 True False False - >>> pd.get_dummies(pd.Series(list('abcaa')), drop_first=True) + >>> pd.get_dummies(pd.Series(list("abcaa")), drop_first=True) b c 0 False False 1 True False @@ -149,7 +148,7 @@ def get_dummies( 3 False False 4 False False - >>> pd.get_dummies(pd.Series(list('abc')), dtype=float) + >>> pd.get_dummies(pd.Series(list("abc")), dtype=float) a b c 0 1.0 0.0 0.0 1 0.0 1.0 0.0 @@ -426,8 +425,7 @@ def from_dummies( Examples -------- - >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0], - ... "c": [0, 0, 1, 0]}) + >>> df = pd.DataFrame({"a": [1, 0, 0, 1], "b": [0, 1, 0, 0], "c": [0, 0, 1, 0]}) >>> df a b c @@ -442,9 +440,15 @@ def from_dummies( 2 c 3 a - >>> df = pd.DataFrame({"col1_a": [1, 0, 1], "col1_b": [0, 1, 0], - ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], - ... "col2_c": [0, 0, 1]}) + >>> df = pd.DataFrame( + ... { + ... "col1_a": [1, 0, 1], + ... "col1_b": [0, 1, 0], + ... "col2_a": [0, 1, 0], + ... "col2_b": [1, 0, 0], + ... "col2_c": [0, 0, 1], + ... } + ... ) >>> df col1_a col1_b col2_a col2_b col2_c @@ -458,9 +462,15 @@ def from_dummies( 1 b a 2 a c - >>> df = pd.DataFrame({"col1_a": [1, 0, 0], "col1_b": [0, 1, 0], - ... "col2_a": [0, 1, 0], "col2_b": [1, 0, 0], - ... "col2_c": [0, 0, 0]}) + >>> df = pd.DataFrame( + ... { + ... "col1_a": [1, 0, 0], + ... "col1_b": [0, 1, 0], + ... "col2_a": [0, 1, 0], + ... "col2_b": [1, 0, 0], + ... "col2_c": [0, 0, 0], + ... } + ... ) >>> df col1_a col1_b col2_a col2_b col2_c diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 3ee896275a67a..7b8ef8da3ab46 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -176,15 +176,21 @@ def lreshape(data: DataFrame, groups: dict, dropna: bool = True) -> DataFrame: Examples -------- - >>> data = pd.DataFrame({'hr1': [514, 573], 'hr2': [545, 526], - ... 'team': ['Red Sox', 'Yankees'], - ... 'year1': [2007, 2007], 'year2': [2008, 2008]}) + >>> data = pd.DataFrame( + ... { + ... "hr1": [514, 573], + ... "hr2": [545, 526], + ... "team": ["Red Sox", "Yankees"], + ... "year1": [2007, 2007], + ... "year2": [2008, 2008], + ... } + ... ) >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 - >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) + >>> pd.lreshape(data, {"year": ["year1", "year2"], "hr": ["hr1", "hr2"]}) team year hr 0 Red Sox 2007 514 1 Yankees 2007 573 @@ -290,12 +296,15 @@ def wide_to_long( Examples -------- >>> np.random.seed(123) - >>> df = pd.DataFrame({"A1970" : {0 : "a", 1 : "b", 2 : "c"}, - ... "A1980" : {0 : "d", 1 : "e", 2 : "f"}, - ... "B1970" : {0 : 2.5, 1 : 1.2, 2 : .7}, - ... "B1980" : {0 : 3.2, 1 : 1.3, 2 : .1}, - ... "X" : dict(zip(range(3), np.random.randn(3))) - ... }) + >>> df = pd.DataFrame( + ... { + ... "A1970": {0: "a", 1: "b", 2: "c"}, + ... "A1980": {0: "d", 1: "e", 2: "f"}, + ... "B1970": {0: 2.5, 1: 1.2, 2: 0.7}, + ... "B1980": {0: 3.2, 1: 1.3, 2: 0.1}, + ... "X": dict(zip(range(3), np.random.randn(3))), + ... } + ... ) >>> df["id"] = df.index >>> df A1970 A1980 B1970 B1980 X id @@ -315,12 +324,14 @@ def wide_to_long( With multiple id columns - >>> df = pd.DataFrame({ - ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], - ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], - ... 'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], - ... 'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] - ... }) + >>> df = pd.DataFrame( + ... { + ... "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... "ht1": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... "ht2": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9], + ... } + ... ) >>> df famid birth ht1 ht2 0 1 1 2.8 3.4 @@ -332,7 +343,7 @@ def wide_to_long( 6 3 1 2.2 3.3 7 3 2 2.3 3.4 8 3 3 2.1 2.9 - >>> long_format = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age') + >>> long_format = pd.wide_to_long(df, stubnames="ht", i=["famid", "birth"], j="age") >>> long_format ... # doctest: +NORMALIZE_WHITESPACE ht @@ -359,7 +370,7 @@ def wide_to_long( Going from long back to wide just takes some creative use of `unstack` >>> wide_format = long_format.unstack() - >>> wide_format.columns = wide_format.columns.map('{0[0]}{0[1]}'.format) + >>> wide_format.columns = wide_format.columns.map("{0[0]}{0[1]}".format) >>> wide_format.reset_index() famid birth ht1 ht2 0 1 1 2.8 3.4 @@ -375,20 +386,23 @@ def wide_to_long( Less wieldy column names are also handled >>> np.random.seed(0) - >>> df = pd.DataFrame({'A(weekly)-2010': np.random.rand(3), - ... 'A(weekly)-2011': np.random.rand(3), - ... 'B(weekly)-2010': np.random.rand(3), - ... 'B(weekly)-2011': np.random.rand(3), - ... 'X' : np.random.randint(3, size=3)}) - >>> df['id'] = df.index + >>> df = pd.DataFrame( + ... { + ... "A(weekly)-2010": np.random.rand(3), + ... "A(weekly)-2011": np.random.rand(3), + ... "B(weekly)-2010": np.random.rand(3), + ... "B(weekly)-2011": np.random.rand(3), + ... "X": np.random.randint(3, size=3), + ... } + ... ) + >>> df["id"] = df.index >>> df # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS A(weekly)-2010 A(weekly)-2011 B(weekly)-2010 B(weekly)-2011 X id 0 0.548814 0.544883 0.437587 0.383442 0 0 1 0.715189 0.423655 0.891773 0.791725 1 1 2 0.602763 0.645894 0.963663 0.528895 1 2 - >>> pd.wide_to_long(df, ['A(weekly)', 'B(weekly)'], i='id', - ... j='year', sep='-') + >>> pd.wide_to_long(df, ["A(weekly)", "B(weekly)"], i="id", j="year", sep="-") ... # doctest: +NORMALIZE_WHITESPACE X A(weekly) B(weekly) id year @@ -403,8 +417,13 @@ def wide_to_long( stubnames and pass that list on to wide_to_long >>> stubnames = sorted( - ... set([match[0] for match in df.columns.str.findall( - ... r'[A-B]\(.*\)').values if match != []]) + ... set( + ... [ + ... match[0] + ... for match in df.columns.str.findall(r"[A-B]\(.*\)").values + ... if match != [] + ... ] + ... ) ... ) >>> list(stubnames) ['A(weekly)', 'B(weekly)'] @@ -412,12 +431,14 @@ def wide_to_long( All of the above examples have integers as suffixes. It is possible to have non-integers as suffixes. - >>> df = pd.DataFrame({ - ... 'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3], - ... 'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3], - ... 'ht_one': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], - ... 'ht_two': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9] - ... }) + >>> df = pd.DataFrame( + ... { + ... "famid": [1, 1, 1, 2, 2, 2, 3, 3, 3], + ... "birth": [1, 2, 3, 1, 2, 3, 1, 2, 3], + ... "ht_one": [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1], + ... "ht_two": [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9], + ... } + ... ) >>> df famid birth ht_one ht_two 0 1 1 2.8 3.4 @@ -430,8 +451,9 @@ def wide_to_long( 7 3 2 2.3 3.4 8 3 3 2.1 2.9 - >>> long_format = pd.wide_to_long(df, stubnames='ht', i=['famid', 'birth'], j='age', - ... sep='_', suffix=r'\w+') + >>> long_format = pd.wide_to_long( + ... df, stubnames="ht", i=["famid", "birth"], j="age", sep="_", suffix=r"\w+" + ... ) >>> long_format ... # doctest: +NORMALIZE_WHITESPACE ht diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 95261394994ae..4f10fd729723e 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -366,7 +366,7 @@ def merge_ordered( ... { ... "key": ["a", "c", "e", "a", "c", "e"], ... "lvalue": [1, 2, 3, 1, 2, 3], - ... "group": ["a", "a", "a", "b", "b", "b"] + ... "group": ["a", "a", "a", "b", "b", "b"], ... } ... ) >>> df1 @@ -597,7 +597,7 @@ def merge_asof( ... pd.Timestamp("2016-05-25 13:30:00.048"), ... pd.Timestamp("2016-05-25 13:30:00.049"), ... pd.Timestamp("2016-05-25 13:30:00.072"), - ... pd.Timestamp("2016-05-25 13:30:00.075") + ... pd.Timestamp("2016-05-25 13:30:00.075"), ... ], ... "ticker": [ ... "GOOG", @@ -607,10 +607,10 @@ def merge_asof( ... "GOOG", ... "AAPL", ... "GOOG", - ... "MSFT" + ... "MSFT", ... ], ... "bid": [720.50, 51.95, 51.97, 51.99, 720.50, 97.99, 720.50, 52.01], - ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03] + ... "ask": [720.93, 51.96, 51.98, 52.00, 720.93, 98.01, 720.88, 52.03], ... } ... ) >>> quotes @@ -631,11 +631,11 @@ def merge_asof( ... pd.Timestamp("2016-05-25 13:30:00.038"), ... pd.Timestamp("2016-05-25 13:30:00.048"), ... pd.Timestamp("2016-05-25 13:30:00.048"), - ... pd.Timestamp("2016-05-25 13:30:00.048") + ... pd.Timestamp("2016-05-25 13:30:00.048"), ... ], ... "ticker": ["MSFT", "MSFT", "GOOG", "GOOG", "AAPL"], ... "price": [51.95, 51.95, 720.77, 720.92, 98.0], - ... "quantity": [75, 155, 100, 100, 100] + ... "quantity": [75, 155, 100, 100, 100], ... } ... ) >>> trades @@ -678,7 +678,7 @@ def merge_asof( ... on="time", ... by="ticker", ... tolerance=pd.Timedelta("10ms"), - ... allow_exact_matches=False + ... allow_exact_matches=False, ... ) time ticker price quantity bid ask 0 2016-05-25 13:30:00.023 MSFT 51.95 75 NaN NaN diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 51d91e4113c4e..7d563ed7b62f6 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -651,14 +651,55 @@ def crosstab( Examples -------- - >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", - ... "bar", "bar", "foo", "foo", "foo"], dtype=object) - >>> b = np.array(["one", "one", "one", "two", "one", "one", - ... "one", "two", "two", "two", "one"], dtype=object) - >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", - ... "shiny", "dull", "shiny", "shiny", "shiny"], - ... dtype=object) - >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) + >>> a = np.array( + ... [ + ... "foo", + ... "foo", + ... "foo", + ... "foo", + ... "bar", + ... "bar", + ... "bar", + ... "bar", + ... "foo", + ... "foo", + ... "foo", + ... ], + ... dtype=object, + ... ) + >>> b = np.array( + ... [ + ... "one", + ... "one", + ... "one", + ... "two", + ... "one", + ... "one", + ... "one", + ... "two", + ... "two", + ... "two", + ... "one", + ... ], + ... dtype=object, + ... ) + >>> c = np.array( + ... [ + ... "dull", + ... "dull", + ... "shiny", + ... "dull", + ... "dull", + ... "shiny", + ... "shiny", + ... "dull", + ... "shiny", + ... "shiny", + ... "shiny", + ... ], + ... dtype=object, + ... ) + >>> pd.crosstab(a, [b, c], rownames=["a"], colnames=["b", "c"]) b one two c dull shiny dull shiny a @@ -669,8 +710,8 @@ def crosstab( shown in the output because dropna is True by default. Set dropna=False to preserve categories with no data. - >>> foo = pd.Categorical(['a', 'b'], categories=['a', 'b', 'c']) - >>> bar = pd.Categorical(['d', 'e'], categories=['d', 'e', 'f']) + >>> foo = pd.Categorical(["a", "b"], categories=["a", "b", "c"]) + >>> bar = pd.Categorical(["d", "e"], categories=["d", "e", "f"]) >>> pd.crosstab(foo, bar) col_0 d e row_0 diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index ad313b112a2e7..bb544b588dd35 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -82,8 +82,9 @@ class _Unstacker: Examples -------- - >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), - ... ('two', 'a'), ('two', 'b')]) + >>> index = pd.MultiIndex.from_tuples( + ... [("one", "a"), ("one", "b"), ("two", "a"), ("two", "b")] + ... ) >>> s = pd.Series(np.arange(1, 5, dtype=np.int64), index=index) >>> s one a 1 @@ -889,7 +890,7 @@ def _reorder_for_extension_array_stack( Examples -------- - >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f']) + >>> arr = np.array(["a", "b", "c", "d", "e", "f"]) >>> _reorder_for_extension_array_stack(arr, 2, 3) array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), - ... 3, labels=["bad", "medium", "good"]) + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, labels=["bad", "medium", "good"]) ['bad', 'good', 'medium', 'medium', 'good', 'bad'] Categories (3, object): ['bad' < 'medium' < 'good'] ``ordered=False`` will result in unordered categories when labels are passed. This parameter can be used to allow non-unique labels: - >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, - ... labels=["B", "A", "B"], ordered=False) + >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, labels=["B", "A", "B"], ordered=False) ['B', 'B', 'A', 'A', 'B', 'B'] Categories (2, object): ['A', 'B'] @@ -186,8 +184,7 @@ def cut( Passing a Series as an input returns a Series with categorical dtype: - >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), - ... index=['a', 'b', 'c', 'd', 'e']) + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), index=["a", "b", "c", "d", "e"]) >>> pd.cut(s, 3) ... # doctest: +ELLIPSIS a (1.992, 4.667] @@ -201,8 +198,7 @@ def cut( Passing a Series as an input returns a Series with mapping value. It is used to map numerically to intervals based on bins. - >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), - ... index=['a', 'b', 'c', 'd', 'e']) + >>> s = pd.Series(np.array([2, 4, 6, 8, 10]), index=["a", "b", "c", "d", "e"]) >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False) ... # doctest: +ELLIPSIS (a 1.0 @@ -215,8 +211,14 @@ def cut( Use `drop` optional when bins is not unique - >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True, - ... right=False, duplicates='drop') + >>> pd.cut( + ... s, + ... [0, 2, 4, 6, 10, 10], + ... labels=False, + ... retbins=True, + ... right=False, + ... duplicates="drop", + ... ) ... # doctest: +ELLIPSIS (a 1.0 b 2.0 diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index 476e3922b6989..0f1fbc662e1a6 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -25,7 +25,7 @@ def cartesian_product(X) -> list[np.ndarray]: Examples -------- - >>> cartesian_product([list('ABC'), [1, 2]]) + >>> cartesian_product([list("ABC"), [1, 2]]) [array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='>> d = {'a': 1, 'b': 2, 'c': 3} - >>> ser = pd.Series(data=d, index=['a', 'b', 'c']) + >>> d = {"a": 1, "b": 2, "c": 3} + >>> ser = pd.Series(data=d, index=["a", "b", "c"]) >>> ser a 1 b 2 @@ -305,8 +305,8 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] The keys of the dictionary match with the Index values, hence the Index values have no effect. - >>> d = {'a': 1, 'b': 2, 'c': 3} - >>> ser = pd.Series(data=d, index=['x', 'y', 'z']) + >>> d = {"a": 1, "b": 2, "c": 3} + >>> ser = pd.Series(data=d, index=["x", "y", "z"]) >>> ser x NaN y NaN @@ -733,7 +733,7 @@ def name(self) -> Hashable: -------- The Series name can be set initially when calling the constructor. - >>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers') + >>> s = pd.Series([1, 2, 3], dtype=np.int64, name="Numbers") >>> s 0 1 1 2 @@ -748,8 +748,9 @@ def name(self) -> Hashable: The name of a Series within a DataFrame is its column name. - >>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], - ... columns=["Odd Numbers", "Even Numbers"]) + >>> df = pd.DataFrame( + ... [[1, 2], [3, 4], [5, 6]], columns=["Odd Numbers", "Even Numbers"] + ... ) >>> df Odd Numbers Even Numbers 0 1 2 @@ -790,17 +791,16 @@ def values(self): >>> pd.Series([1, 2, 3]).values array([1, 2, 3]) - >>> pd.Series(list('aabc')).values + >>> pd.Series(list("aabc")).values array(['a', 'a', 'b', 'c'], dtype=object) - >>> pd.Series(list('aabc')).astype('category').values + >>> pd.Series(list("aabc")).astype("category").values ['a', 'a', 'b', 'c'] Categories (3, object): ['a', 'b', 'c'] Timezone aware datetime data is converted to UTC: - >>> pd.Series(pd.date_range('20130101', periods=3, - ... tz='US/Eastern')).values + >>> pd.Series(pd.date_range("20130101", periods=3, tz="US/Eastern")).values array(['2013-01-01T05:00:00.000000000', '2013-01-02T05:00:00.000000000', '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]') @@ -985,7 +985,7 @@ def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: For timezone-aware data, the timezones may be retained with ``dtype='object'`` - >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> tzser = pd.Series(pd.date_range("2000", periods=2, tz="CET")) >>> np.asarray(tzser, dtype="object") array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), Timestamp('2000-01-02 00:00:00+0100', tz='CET')], @@ -1425,7 +1425,7 @@ def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: Examples -------- - >>> s = pd.Series(['a', 'b', 'c']) + >>> s = pd.Series(["a", "b", "c"]) >>> s 0 a 1 b @@ -1541,8 +1541,11 @@ def reset_index( Examples -------- - >>> s = pd.Series([1, 2, 3, 4], name='foo', - ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx')) + >>> s = pd.Series( + ... [1, 2, 3, 4], + ... name="foo", + ... index=pd.Index(["a", "b", "c", "d"], name="idx"), + ... ) Generate a DataFrame with default index. @@ -1555,7 +1558,7 @@ def reset_index( To specify the name of the new column use `name`. - >>> s.reset_index(name='values') + >>> s.reset_index(name="values") idx values 0 a 1 1 b 2 @@ -1574,16 +1577,19 @@ def reset_index( The `level` parameter is interesting for Series with a multi-level index. - >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']), - ... np.array(['one', 'two', 'one', 'two'])] + >>> arrays = [ + ... np.array(["bar", "bar", "baz", "baz"]), + ... np.array(["one", "two", "one", "two"]), + ... ] >>> s2 = pd.Series( - ... range(4), name='foo', - ... index=pd.MultiIndex.from_arrays(arrays, - ... names=['a', 'b'])) + ... range(4), + ... name="foo", + ... index=pd.MultiIndex.from_arrays(arrays, names=["a", "b"]), + ... ) To remove a specific level from the Index, use `level`. - >>> s2.reset_index(level='a') + >>> s2.reset_index(level="a") a foo b one bar 0 @@ -1863,7 +1869,7 @@ def items(self) -> Iterable[tuple[Hashable, Any]]: Examples -------- - >>> s = pd.Series(['A', 'B', 'C']) + >>> s = pd.Series(["A", "B", "C"]) >>> for index, value in s.items(): ... print(f"Index : {index}, Value : {value}") Index : 0, Value : A @@ -1966,8 +1972,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: Examples -------- - >>> s = pd.Series(["a", "b", "c"], - ... name="vals") + >>> s = pd.Series(["a", "b", "c"], name="vals") >>> s.to_frame() vals 0 a @@ -2245,16 +2250,17 @@ def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation Examples -------- - >>> pd.Series([2, 1, 3, 3], name='A').unique() + >>> pd.Series([2, 1, 3, 3], name="A").unique() array([2, 1, 3]) - >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique() + >>> pd.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).unique() ['2016-01-01 00:00:00'] Length: 1, dtype: datetime64[ns] - >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern') - ... for _ in range(3)]).unique() + >>> pd.Series( + ... [pd.Timestamp("2016-01-01", tz="US/Eastern") for _ in range(3)] + ... ).unique() ['2016-01-01 00:00:00-05:00'] Length: 1, dtype: datetime64[ns, US/Eastern] @@ -2262,11 +2268,12 @@ def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation An Categorical will return categories in the order of appearance and with the same dtype. - >>> pd.Series(pd.Categorical(list('baabc'))).unique() + >>> pd.Series(pd.Categorical(list("baabc"))).unique() ['b', 'a', 'c'] Categories (3, object): ['a', 'b', 'c'] - >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'), - ... ordered=True)).unique() + >>> pd.Series( + ... pd.Categorical(list("baabc"), categories=list("abc"), ordered=True) + ... ).unique() ['b', 'a', 'c'] Categories (3, object): ['a' < 'b' < 'c'] """ @@ -2338,8 +2345,9 @@ def drop_duplicates( -------- Generate a Series with duplicated entries. - >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', 'hippo'], - ... name='animal') + >>> s = pd.Series( + ... ["llama", "cow", "llama", "beetle", "llama", "hippo"], name="animal" + ... ) >>> s 0 llama 1 cow @@ -2363,7 +2371,7 @@ def drop_duplicates( The value 'last' for parameter 'keep' keeps the last occurrence for each set of duplicated entries. - >>> s.drop_duplicates(keep='last') + >>> s.drop_duplicates(keep="last") 1 cow 3 beetle 4 llama @@ -2427,7 +2435,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: By default, for each set of duplicated values, the first occurrence is set on False and all others on True: - >>> animals = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama']) + >>> animals = pd.Series(["llama", "cow", "llama", "beetle", "llama"]) >>> animals.duplicated() 0 False 1 False @@ -2438,7 +2446,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: which is equivalent to - >>> animals.duplicated(keep='first') + >>> animals.duplicated(keep="first") 0 False 1 False 2 True @@ -2449,7 +2457,7 @@ def duplicated(self, keep: DropKeep = "first") -> Series: By using 'last', the last occurrence of each set of duplicated values is set on False and all others on True: - >>> animals.duplicated(keep='last') + >>> animals.duplicated(keep="last") 0 True 1 False 2 True @@ -2516,8 +2524,7 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab Examples -------- - >>> s = pd.Series(data=[1, None, 4, 1], - ... index=['A', 'B', 'C', 'D']) + >>> s = pd.Series(data=[1, None, 4, 1], index=["A", "B", "C", "D"]) >>> s A 1.0 B NaN @@ -2599,8 +2606,7 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab Examples -------- - >>> s = pd.Series(data=[1, None, 4, 3, 4], - ... index=['A', 'B', 'C', 'D', 'E']) + >>> s = pd.Series(data=[1, None, 4, 3, 4], index=["A", "B", "C", "D", "E"]) >>> s A 1.0 B NaN @@ -2736,9 +2742,9 @@ def quantile( Examples -------- >>> s = pd.Series([1, 2, 3, 4]) - >>> s.quantile(.5) + >>> s.quantile(0.5) 2.5 - >>> s.quantile([.25, .5, .75]) + >>> s.quantile([0.25, 0.5, 0.75]) 0.25 1.75 0.50 2.50 0.75 3.25 @@ -2820,8 +2826,8 @@ def corr( >>> def histogram_intersection(a, b): ... v = np.minimum(a, b).sum().round(decimals=1) ... return v - >>> s1 = pd.Series([.2, .0, .6, .2]) - >>> s2 = pd.Series([.3, .6, .0, .1]) + >>> s1 = pd.Series([0.2, 0.0, 0.6, 0.2]) + >>> s2 = pd.Series([0.3, 0.6, 0.0, 0.1]) >>> s1.corr(s2, method=histogram_intersection) 0.3 @@ -3278,12 +3284,12 @@ def combine( Consider 2 Datasets ``s1`` and ``s2`` containing highest clocked speeds of different birds. - >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0}) + >>> s1 = pd.Series({"falcon": 330.0, "eagle": 160.0}) >>> s1 falcon 330.0 eagle 160.0 dtype: float64 - >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0}) + >>> s2 = pd.Series({"falcon": 345.0, "eagle": 200.0, "duck": 30.0}) >>> s2 falcon 345.0 eagle 200.0 @@ -3379,8 +3385,8 @@ def combine_first(self, other) -> Series: Null values still persist if the location of that null value does not exist in `other` - >>> s1 = pd.Series({'falcon': np.nan, 'eagle': 160.0}) - >>> s2 = pd.Series({'eagle': 200.0, 'duck': 30.0}) + >>> s1 = pd.Series({"falcon": np.nan, "eagle": 160.0}) + >>> s2 = pd.Series({"eagle": 200.0, "duck": 30.0}) >>> s1.combine_first(s2) duck 30.0 eagle 160.0 @@ -3433,8 +3439,8 @@ def update(self, other: Series | Sequence | Mapping) -> None: 2 6 dtype: int64 - >>> s = pd.Series(['a', 'b', 'c']) - >>> s.update(pd.Series(['d', 'e'], index=[0, 2])) + >>> s = pd.Series(["a", "b", "c"]) + >>> s.update(pd.Series(["d", "e"], index=[0, 2])) >>> s 0 d 1 b @@ -3624,7 +3630,7 @@ def sort_values( Sort values putting NAs first - >>> s.sort_values(na_position='first') + >>> s.sort_values(na_position="first") 0 NaN 1 1.0 2 3.0 @@ -3634,7 +3640,7 @@ def sort_values( Sort a series of strings - >>> s = pd.Series(['z', 'b', 'd', 'a', 'c']) + >>> s = pd.Series(["z", "b", "d", "a", "c"]) >>> s 0 z 1 b @@ -3654,7 +3660,7 @@ def sort_values( Sort using a key function. Your `key` function will be given the ``Series`` of values and should return an array-like. - >>> s = pd.Series(['a', 'B', 'c', 'D', 'e']) + >>> s = pd.Series(["a", "B", "c", "D", "e"]) >>> s.sort_values() 1 B 3 D @@ -3845,7 +3851,7 @@ def sort_index( Examples -------- - >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4]) + >>> s = pd.Series(["a", "b", "c", "d"], index=[3, 2, 1, 4]) >>> s.sort_index() 1 c 2 b @@ -3865,8 +3871,8 @@ def sort_index( By default NaNs are put at the end, but use `na_position` to place them at the beginning - >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan]) - >>> s.sort_index(na_position='first') + >>> s = pd.Series(["a", "b", "c", "d"], index=[3, 2, 1, np.nan]) + >>> s.sort_index(na_position="first") NaN d 1.0 c 2.0 b @@ -3875,10 +3881,10 @@ def sort_index( Specify index level to sort - >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo', - ... 'baz', 'baz', 'bar', 'bar']), - ... np.array(['two', 'one', 'two', 'one', - ... 'two', 'one', 'two', 'one'])] + >>> arrays = [ + ... np.array(["qux", "qux", "foo", "foo", "baz", "baz", "bar", "bar"]), + ... np.array(["two", "one", "two", "one", "two", "one", "two", "one"]), + ... ] >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) >>> s.sort_index(level=1) bar one 8 @@ -3906,8 +3912,8 @@ def sort_index( Apply a key function before sorting - >>> s = pd.Series([1, 2, 3, 4], index=['A', 'b', 'C', 'd']) - >>> s.sort_index(key=lambda x : x.str.lower()) + >>> s = pd.Series([1, 2, 3, 4], index=["A", "b", "C", "d"]) + >>> s.sort_index(key=lambda x: x.str.lower()) A 1 b 2 C 3 @@ -4039,11 +4045,18 @@ def nlargest( Examples -------- - >>> countries_population = {"Italy": 59000000, "France": 65000000, - ... "Malta": 434000, "Maldives": 434000, - ... "Brunei": 434000, "Iceland": 337000, - ... "Nauru": 11300, "Tuvalu": 11300, - ... "Anguilla": 11300, "Montserrat": 5200} + >>> countries_population = { + ... "Italy": 59000000, + ... "France": 65000000, + ... "Malta": 434000, + ... "Maldives": 434000, + ... "Brunei": 434000, + ... "Iceland": 337000, + ... "Nauru": 11300, + ... "Tuvalu": 11300, + ... "Anguilla": 11300, + ... "Montserrat": 5200, + ... } >>> s = pd.Series(countries_population) >>> s Italy 59000000 @@ -4081,7 +4094,7 @@ def nlargest( Brunei will be kept since it is the last with value 434000 based on the index order. - >>> s.nlargest(3, keep='last') + >>> s.nlargest(3, keep="last") France 65000000 Italy 59000000 Brunei 434000 @@ -4090,7 +4103,7 @@ def nlargest( The `n` largest elements where ``n=3`` with all duplicates kept. Note that the returned Series has five elements due to the three duplicates. - >>> s.nlargest(3, keep='all') + >>> s.nlargest(3, keep="all") France 65000000 Italy 59000000 Malta 434000 @@ -4139,11 +4152,18 @@ def nsmallest( Examples -------- - >>> countries_population = {"Italy": 59000000, "France": 65000000, - ... "Brunei": 434000, "Malta": 434000, - ... "Maldives": 434000, "Iceland": 337000, - ... "Nauru": 11300, "Tuvalu": 11300, - ... "Anguilla": 11300, "Montserrat": 5200} + >>> countries_population = { + ... "Italy": 59000000, + ... "France": 65000000, + ... "Brunei": 434000, + ... "Malta": 434000, + ... "Maldives": 434000, + ... "Iceland": 337000, + ... "Nauru": 11300, + ... "Tuvalu": 11300, + ... "Anguilla": 11300, + ... "Montserrat": 5200, + ... } >>> s = pd.Series(countries_population) >>> s Italy 59000000 @@ -4181,7 +4201,7 @@ def nsmallest( duplicates. Anguilla and Tuvalu will be kept since they are the last with value 11300 based on the index order. - >>> s.nsmallest(3, keep='last') + >>> s.nsmallest(3, keep="last") Montserrat 5200 Anguilla 11300 Tuvalu 11300 @@ -4190,7 +4210,7 @@ def nsmallest( The `n` smallest elements where ``n=3`` with all duplicates kept. Note that the returned Series has four elements due to the three duplicates. - >>> s.nsmallest(3, keep='all') + >>> s.nsmallest(3, keep="all") Montserrat 5200 Nauru 11300 Tuvalu 11300 @@ -4314,8 +4334,10 @@ def reorder_levels(self, order: Sequence[Level]) -> Series: Examples -------- - >>> arrays = [np.array(["dog", "dog", "cat", "cat", "bird", "bird"]), - ... np.array(["white", "black", "white", "black", "white", "black"])] + >>> arrays = [ + ... np.array(["dog", "dog", "cat", "cat", "bird", "bird"]), + ... np.array(["white", "black", "white", "black", "white", "black"]), + ... ] >>> s = pd.Series([1, 2, 3, 3, 5, 2], index=arrays) >>> s dog white 1 @@ -4377,7 +4399,7 @@ def explode(self, ignore_index: bool = False) -> Series: Examples -------- - >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]]) + >>> s = pd.Series([[1, 2, 3], "foo", [], [3, 4]]) >>> s 0 [1, 2, 3] 1 foo @@ -4439,9 +4461,10 @@ def unstack( Examples -------- - >>> s = pd.Series([1, 2, 3, 4], - ... index=pd.MultiIndex.from_product([['one', 'two'], - ... ['a', 'b']])) + >>> s = pd.Series( + ... [1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([["one", "two"], ["a", "b"]]), + ... ) >>> s one a 1 b 2 @@ -4508,7 +4531,7 @@ def map( Examples -------- - >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit']) + >>> s = pd.Series(["cat", "dog", np.nan, "rabbit"]) >>> s 0 cat 1 dog @@ -4520,7 +4543,7 @@ def map( in the ``dict`` are converted to ``NaN``, unless the dict has a default value (e.g. ``defaultdict``): - >>> s.map({'cat': 'kitten', 'dog': 'puppy'}) + >>> s.map({"cat": "kitten", "dog": "puppy"}) 0 kitten 1 puppy 2 NaN @@ -4529,7 +4552,7 @@ def map( It also accepts a function: - >>> s.map('I am a {}'.format) + >>> s.map("I am a {}".format) 0 I am a cat 1 I am a dog 2 I am a nan @@ -4539,7 +4562,7 @@ def map( To avoid applying the function to missing values (and keep them as ``NaN``) ``na_action='ignore'`` can be used: - >>> s.map('I am a {}'.format, na_action='ignore') + >>> s.map("I am a {}".format, na_action="ignore") 0 I am a cat 1 I am a dog 2 NaN @@ -4696,8 +4719,7 @@ def apply( -------- Create a series with typical summer temperatures for each city. - >>> s = pd.Series([20, 21, 12], - ... index=['London', 'New York', 'Helsinki']) + >>> s = pd.Series([20, 21, 12], index=["London", "New York", "Helsinki"]) >>> s London 20 New York 21 @@ -4708,7 +4730,7 @@ def apply( argument to ``apply()``. >>> def square(x): - ... return x ** 2 + ... return x**2 >>> s.apply(square) London 400 New York 441 @@ -4718,7 +4740,7 @@ def apply( Square the values by passing an anonymous function as an argument to ``apply()``. - >>> s.apply(lambda x: x ** 2) + >>> s.apply(lambda x: x**2) London 400 New York 441 Helsinki 144 @@ -4912,7 +4934,7 @@ def rename( 1 2 2 3 Name: my_name, dtype: int64 - >>> s.rename(lambda x: x ** 2) # function, changes labels + >>> s.rename(lambda x: x**2) # function, changes labels 0 1 1 2 4 3 @@ -5216,7 +5238,7 @@ def drop( Examples -------- - >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C']) + >>> s = pd.Series(data=np.arange(3), index=["A", "B", "C"]) >>> s A 0 B 1 @@ -5225,18 +5247,17 @@ def drop( Drop labels B en C - >>> s.drop(labels=['B', 'C']) + >>> s.drop(labels=["B", "C"]) A 0 dtype: int64 Drop 2nd level label in MultiIndex Series - >>> midx = pd.MultiIndex(levels=[['llama', 'cow', 'falcon'], - ... ['speed', 'weight', 'length']], - ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], - ... [0, 1, 2, 0, 1, 2, 0, 1, 2]]) - >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], - ... index=midx) + >>> midx = pd.MultiIndex( + ... levels=[["llama", "cow", "falcon"], ["speed", "weight", "length"]], + ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]], + ... ) + >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx) >>> s llama speed 45.0 weight 200.0 @@ -5249,7 +5270,7 @@ def drop( length 0.3 dtype: float64 - >>> s.drop(labels='weight', level=1) + >>> s.drop(labels="weight", level=1) llama speed 45.0 length 1.2 cow speed 30.0 @@ -5418,9 +5439,10 @@ def isin(self, values) -> Series: Examples -------- - >>> s = pd.Series(['llama', 'cow', 'llama', 'beetle', 'llama', - ... 'hippo'], name='animal') - >>> s.isin(['cow', 'llama']) + >>> s = pd.Series( + ... ["llama", "cow", "llama", "beetle", "llama", "hippo"], name="animal" + ... ) + >>> s.isin(["cow", "llama"]) 0 True 1 True 2 True @@ -5431,7 +5453,7 @@ def isin(self, values) -> Series: To invert the boolean values, use the ``~`` operator: - >>> ~s.isin(['cow', 'llama']) + >>> ~s.isin(["cow", "llama"]) 0 False 1 False 2 False @@ -5443,7 +5465,7 @@ def isin(self, values) -> Series: Passing a single string as ``s.isin('llama')`` will raise an error. Use a list of one element instead: - >>> s.isin(['llama']) + >>> s.isin(["llama"]) 0 True 1 False 2 True @@ -5454,10 +5476,10 @@ def isin(self, values) -> Series: Strings and integers are distinct and are therefore not comparable: - >>> pd.Series([1]).isin(['1']) + >>> pd.Series([1]).isin(["1"]) 0 False dtype: bool - >>> pd.Series([1.1]).isin(['1.1']) + >>> pd.Series([1.1]).isin(["1.1"]) 0 False dtype: bool """ @@ -5531,8 +5553,8 @@ def between( `left` and `right` can be any scalar value: - >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve']) - >>> s.between('Anna', 'Daniel') + >>> s = pd.Series(["Alice", "Bob", "Carol", "Eve"]) + >>> s.between("Anna", "Daniel") 0 False 1 True 2 True @@ -5600,12 +5622,16 @@ def case_when( Examples -------- - >>> c = pd.Series([6, 7, 8, 9], name='c') + >>> c = pd.Series([6, 7, 8, 9], name="c") >>> a = pd.Series([0, 0, 1, 2]) >>> b = pd.Series([0, 3, 4, 5]) - >>> c.case_when(caselist=[(a.gt(0), a), # condition, replacement - ... (b.gt(0), b)]) + >>> c.case_when( + ... caselist=[ + ... (a.gt(0), a), # condition, replacement + ... (b.gt(0), b), + ... ] + ... ) 0 6 1 3 2 1 @@ -5764,7 +5790,7 @@ def dropna( Examples -------- - >>> ser = pd.Series([1., 2., np.nan]) + >>> ser = pd.Series([1.0, 2.0, np.nan]) >>> ser 0 1.0 1 2.0 @@ -5781,7 +5807,7 @@ def dropna( Empty strings are not considered NA values. ``None`` is considered an NA value. - >>> ser = pd.Series([np.nan, 2, pd.NaT, '', None, 'I stay']) + >>> ser = pd.Series([np.nan, 2, pd.NaT, "", None, "I stay"]) >>> ser 0 NaN 1 2 @@ -5857,7 +5883,7 @@ def to_timestamp( Examples -------- - >>> idx = pd.PeriodIndex(['2023', '2024', '2025'], freq='Y') + >>> idx = pd.PeriodIndex(["2023", "2024", "2025"], freq="Y") >>> s1 = pd.Series([1, 2, 3], index=idx) >>> s1 2023 1 @@ -5877,7 +5903,7 @@ def to_timestamp( Using `freq` which is the offset that the Timestamps will have >>> s2 = pd.Series([1, 2, 3], index=idx) - >>> s2 = s2.to_timestamp(freq='M') + >>> s2 = s2.to_timestamp(freq="M") >>> s2 2023-01-31 1 2024-01-31 2 @@ -5922,7 +5948,7 @@ def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series Examples -------- - >>> idx = pd.DatetimeIndex(['2023', '2024', '2025']) + >>> idx = pd.DatetimeIndex(["2023", "2024", "2025"]) >>> s = pd.Series([1, 2, 3], index=idx) >>> s = s.to_period() >>> s diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index fa85897872981..bd523969fba13 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -538,20 +538,20 @@ def cat( When not passing `others`, all values are concatenated into a single string: - >>> s = pd.Series(['a', 'b', np.nan, 'd']) - >>> s.str.cat(sep=' ') + >>> s = pd.Series(["a", "b", np.nan, "d"]) + >>> s.str.cat(sep=" ") 'a b d' By default, NA values in the Series are ignored. Using `na_rep`, they can be given a representation: - >>> s.str.cat(sep=' ', na_rep='?') + >>> s.str.cat(sep=" ", na_rep="?") 'a b ? d' If `others` is specified, corresponding values are concatenated with the separator. Result will be a Series of strings. - >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',') + >>> s.str.cat(["A", "B", "C", "D"], sep=",") 0 a,A 1 b,B 2 NaN @@ -561,7 +561,7 @@ def cat( Missing values will remain missing in the result, but can again be represented using `na_rep` - >>> s.str.cat(['A', 'B', 'C', 'D'], sep=',', na_rep='-') + >>> s.str.cat(["A", "B", "C", "D"], sep=",", na_rep="-") 0 a,A 1 b,B 2 -,C @@ -571,7 +571,7 @@ def cat( If `sep` is not specified, the values are concatenated without separation. - >>> s.str.cat(['A', 'B', 'C', 'D'], na_rep='-') + >>> s.str.cat(["A", "B", "C", "D"], na_rep="-") 0 aA 1 bB 2 -C @@ -581,15 +581,15 @@ def cat( Series with different indexes can be aligned before concatenation. The `join`-keyword works as in other methods. - >>> t = pd.Series(['d', 'a', 'e', 'c'], index=[3, 0, 4, 2]) - >>> s.str.cat(t, join='left', na_rep='-') + >>> t = pd.Series(["d", "a", "e", "c"], index=[3, 0, 4, 2]) + >>> s.str.cat(t, join="left", na_rep="-") 0 aa 1 b- 2 -c 3 dd dtype: object >>> - >>> s.str.cat(t, join='outer', na_rep='-') + >>> s.str.cat(t, join="outer", na_rep="-") 0 aa 1 b- 2 -c @@ -597,13 +597,13 @@ def cat( 4 -e dtype: object >>> - >>> s.str.cat(t, join='inner', na_rep='-') + >>> s.str.cat(t, join="inner", na_rep="-") 0 aa 2 -c 3 dd dtype: object >>> - >>> s.str.cat(t, join='right', na_rep='-') + >>> s.str.cat(t, join="right", na_rep="-") 3 dd 0 aa 4 -e @@ -1082,12 +1082,16 @@ def get(self, i): Examples -------- - >>> s = pd.Series(["String", - ... (1, 2, 3), - ... ["a", "b", "c"], - ... 123, - ... -456, - ... {1: "Hello", "2": "World"}]) + >>> s = pd.Series( + ... [ + ... "String", + ... (1, 2, 3), + ... ["a", "b", "c"], + ... 123, + ... -456, + ... {1: "Hello", "2": "World"}, + ... ] + ... ) >>> s 0 String 1 (1, 2, 3) @@ -1117,9 +1121,13 @@ def get(self, i): Return element with given key - >>> s = pd.Series([{"name": "Hello", "value": "World"}, - ... {"name": "Goodbye", "value": "Planet"}]) - >>> s.str.get('name') + >>> s = pd.Series( + ... [ + ... {"name": "Hello", "value": "World"}, + ... {"name": "Goodbye", "value": "Planet"}, + ... ] + ... ) + >>> s.str.get("name") 0 Hello 1 Goodbye dtype: object @@ -1166,11 +1174,15 @@ def join(self, sep: str): -------- Example with a list that contains non-string elements. - >>> s = pd.Series([['lion', 'elephant', 'zebra'], - ... [1.1, 2.2, 3.3], - ... ['cat', np.nan, 'dog'], - ... ['cow', 4.5, 'goat'], - ... ['duck', ['swan', 'fish'], 'guppy']]) + >>> s = pd.Series( + ... [ + ... ["lion", "elephant", "zebra"], + ... [1.1, 2.2, 3.3], + ... ["cat", np.nan, "dog"], + ... ["cow", 4.5, "goat"], + ... ["duck", ["swan", "fish"], "guppy"], + ... ] + ... ) >>> s 0 [lion, elephant, zebra] 1 [1.1, 2.2, 3.3] @@ -1182,7 +1194,7 @@ def join(self, sep: str): Join all lists using a '-'. The lists containing object(s) of types other than str will produce a NaN. - >>> s.str.join('-') + >>> s.str.join("-") 0 lion-elephant-zebra 1 NaN 2 NaN @@ -1238,8 +1250,8 @@ def contains( -------- Returning a Series of booleans using only a literal pattern. - >>> s1 = pd.Series(['Mouse', 'dog', 'house and parrot', '23', np.nan]) - >>> s1.str.contains('og', regex=False) + >>> s1 = pd.Series(["Mouse", "dog", "house and parrot", "23", np.nan]) + >>> s1.str.contains("og", regex=False) 0 False 1 True 2 False @@ -1249,13 +1261,13 @@ def contains( Returning an Index of booleans using only a literal pattern. - >>> ind = pd.Index(['Mouse', 'dog', 'house and parrot', '23.0', np.nan]) - >>> ind.str.contains('23', regex=False) + >>> ind = pd.Index(["Mouse", "dog", "house and parrot", "23.0", np.nan]) + >>> ind.str.contains("23", regex=False) Index([False, False, False, True, nan], dtype='object') Specifying case sensitivity using `case`. - >>> s1.str.contains('oG', case=True, regex=True) + >>> s1.str.contains("oG", case=True, regex=True) 0 False 1 False 2 False @@ -1267,7 +1279,7 @@ def contains( with `False`. If Series or Index does not contain NaN values the resultant dtype will be `bool`, otherwise, an `object` dtype. - >>> s1.str.contains('og', na=False, regex=True) + >>> s1.str.contains("og", na=False, regex=True) 0 False 1 True 2 False @@ -1277,7 +1289,7 @@ def contains( Returning 'house' or 'dog' when either expression occurs in a string. - >>> s1.str.contains('house|dog', regex=True) + >>> s1.str.contains("house|dog", regex=True) 0 False 1 True 2 True @@ -1288,7 +1300,7 @@ def contains( Ignoring case sensitivity using `flags` with regex. >>> import re - >>> s1.str.contains('PARROT', flags=re.IGNORECASE, regex=True) + >>> s1.str.contains("PARROT", flags=re.IGNORECASE, regex=True) 0 False 1 False 2 True @@ -1298,7 +1310,7 @@ def contains( Returning any digit using regular expression. - >>> s1.str.contains('\\d', regex=True) + >>> s1.str.contains("\\d", regex=True) 0 False 1 False 2 False @@ -1311,8 +1323,8 @@ def contains( return `True`. However, '.0' as a regex matches any character followed by a 0. - >>> s2 = pd.Series(['40', '40.0', '41', '41.0', '35']) - >>> s2.str.contains('.0', regex=True) + >>> s2 = pd.Series(["40", "40.0", "41", "41.0", "35"]) + >>> s2.str.contains(".0", regex=True) 0 True 1 True 2 False @@ -1403,7 +1415,7 @@ def fullmatch(self, pat, case: bool = True, flags: int = 0, na=None): Examples -------- >>> ser = pd.Series(["cat", "duck", "dove"]) - >>> ser.str.fullmatch(r'd.+') + >>> ser.str.fullmatch(r"d.+") 0 False 1 True 2 True @@ -1482,7 +1494,7 @@ def replace( regex patterns as with :meth:`re.sub`. NaN value(s) in the Series are left as is: - >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f.', 'ba', regex=True) + >>> pd.Series(["foo", "fuz", np.nan]).str.replace("f.", "ba", regex=True) 0 bao 1 baz 2 NaN @@ -1491,7 +1503,7 @@ def replace( When `pat` is a string and `regex` is False, every `pat` is replaced with `repl` as with :meth:`str.replace`: - >>> pd.Series(['f.o', 'fuz', np.nan]).str.replace('f.', 'ba', regex=False) + >>> pd.Series(["f.o", "fuz", np.nan]).str.replace("f.", "ba", regex=False) 0 bao 1 fuz 2 NaN @@ -1503,7 +1515,7 @@ def replace( To get the idea: - >>> pd.Series(['foo', 'fuz', np.nan]).str.replace('f', repr, regex=True) + >>> pd.Series(["foo", "fuz", np.nan]).str.replace("f", repr, regex=True) 0 oo 1 uz 2 NaN @@ -1512,8 +1524,8 @@ def replace( Reverse every lowercase alphabetic word: >>> repl = lambda m: m.group(0)[::-1] - >>> ser = pd.Series(['foo 123', 'bar baz', np.nan]) - >>> ser.str.replace(r'[a-z]+', repl, regex=True) + >>> ser = pd.Series(["foo 123", "bar baz", np.nan]) + >>> ser.str.replace(r"[a-z]+", repl, regex=True) 0 oof 123 1 rab zab 2 NaN @@ -1522,8 +1534,8 @@ def replace( Using regex groups (extract second group and swap case): >>> pat = r"(?P\w+) (?P\w+) (?P\w+)" - >>> repl = lambda m: m.group('two').swapcase() - >>> ser = pd.Series(['One Two Three', 'Foo Bar Baz']) + >>> repl = lambda m: m.group("two").swapcase() + >>> ser = pd.Series(["One Two Three", "Foo Bar Baz"]) >>> ser.str.replace(pat, repl, regex=True) 0 tWO 1 bAR @@ -1532,8 +1544,8 @@ def replace( Using a compiled regex with flags >>> import re - >>> regex_pat = re.compile(r'FUZ', flags=re.IGNORECASE) - >>> pd.Series(['foo', 'fuz', np.nan]).str.replace(regex_pat, 'bar', regex=True) + >>> regex_pat = re.compile(r"FUZ", flags=re.IGNORECASE) + >>> pd.Series(["foo", "fuz", np.nan]).str.replace(regex_pat, "bar", regex=True) 0 foo 1 bar 2 NaN @@ -1583,7 +1595,7 @@ def repeat(self, repeats): Examples -------- - >>> s = pd.Series(['a', 'b', 'c']) + >>> s = pd.Series(["a", "b", "c"]) >>> s 0 a 1 b @@ -1658,12 +1670,12 @@ def pad( 1 tiger dtype: object - >>> s.str.pad(width=10, side='right', fillchar='-') + >>> s.str.pad(width=10, side="right", fillchar="-") 0 caribou--- 1 tiger----- dtype: object - >>> s.str.pad(width=10, side='both', fillchar='-') + >>> s.str.pad(width=10, side="both", fillchar="-") 0 -caribou-- 1 --tiger--- dtype: object @@ -1782,7 +1794,7 @@ def zfill(self, width: int): Examples -------- - >>> s = pd.Series(['-1', '1', '1000', 10, np.nan]) + >>> s = pd.Series(["-1", "1", "1000", 10, np.nan]) >>> s 0 -1 1 1 @@ -1917,7 +1929,7 @@ def slice_replace(self, start=None, stop=None, repl=None): Examples -------- - >>> s = pd.Series(['a', 'ab', 'abc', 'abdc', 'abcde']) + >>> s = pd.Series(["a", "ab", "abc", "abdc", "abcde"]) >>> s 0 a 1 ab @@ -1929,7 +1941,7 @@ def slice_replace(self, start=None, stop=None, repl=None): Specify just `start`, meaning replace `start` until the end of the string with `repl`. - >>> s.str.slice_replace(1, repl='X') + >>> s.str.slice_replace(1, repl="X") 0 aX 1 aX 2 aX @@ -1940,7 +1952,7 @@ def slice_replace(self, start=None, stop=None, repl=None): Specify just `stop`, meaning the start of the string to `stop` is replaced with `repl`, and the rest of the string is included. - >>> s.str.slice_replace(stop=2, repl='X') + >>> s.str.slice_replace(stop=2, repl="X") 0 X 1 X 2 Xc @@ -1952,7 +1964,7 @@ def slice_replace(self, start=None, stop=None, repl=None): replaced with `repl`. Everything before or after `start` and `stop` is included as is. - >>> s.str.slice_replace(start=1, stop=3, repl='X') + >>> s.str.slice_replace(start=1, stop=3, repl="X") 0 aX 1 aX 2 aX @@ -1983,8 +1995,8 @@ def decode(self, encoding, errors: str = "strict"): -------- For Series: - >>> ser = pd.Series([b'cow', b'123', b'()']) - >>> ser.str.decode('ascii') + >>> ser = pd.Series([b"cow", b"123", b"()"]) + >>> ser.str.decode("ascii") 0 cow 1 123 2 () @@ -2020,8 +2032,8 @@ def encode(self, encoding, errors: str = "strict"): Examples -------- - >>> ser = pd.Series(['cow', '123', '()']) - >>> ser.str.encode(encoding='ascii') + >>> ser = pd.Series(["cow", "123", "()"]) + >>> ser.str.encode(encoding="ascii") 0 b'cow' 1 b'123' 2 b'()' @@ -2247,7 +2259,7 @@ def wrap(self, width: int, **kwargs): Examples -------- - >>> s = pd.Series(['line to be wrapped', 'another line to be wrapped']) + >>> s = pd.Series(["line to be wrapped", "another line to be wrapped"]) >>> s.str.wrap(12) 0 line to be\nwrapped 1 another line\nto be\nwrapped @@ -2281,13 +2293,13 @@ def get_dummies(self, sep: str = "|"): Examples -------- - >>> pd.Series(['a|b', 'a', 'a|c']).str.get_dummies() + >>> pd.Series(["a|b", "a", "a|c"]).str.get_dummies() a b c 0 1 1 0 1 1 0 0 2 1 0 1 - >>> pd.Series(['a|b', np.nan, 'a|c']).str.get_dummies() + >>> pd.Series(["a|b", np.nan, "a|c"]).str.get_dummies() a b c 0 1 1 0 1 0 0 0 @@ -2325,7 +2337,7 @@ def translate(self, table): Examples -------- >>> ser = pd.Series(["El niño", "Françoise"]) - >>> mytable = str.maketrans({'ñ': 'n', 'ç': 'c'}) + >>> mytable = str.maketrans({"ñ": "n", "ç": "c"}) >>> ser.str.translate(mytable) 0 El nino 1 Francoise @@ -2370,8 +2382,8 @@ def count(self, pat, flags: int = 0): Examples -------- - >>> s = pd.Series(['A', 'B', 'Aaba', 'Baca', np.nan, 'CABA', 'cat']) - >>> s.str.count('a') + >>> s = pd.Series(["A", "B", "Aaba", "Baca", np.nan, "CABA", "cat"]) + >>> s.str.count("a") 0 0.0 1 0.0 2 2.0 @@ -2383,8 +2395,8 @@ def count(self, pat, flags: int = 0): Escape ``'$'`` to find the literal dollar sign. - >>> s = pd.Series(['$', 'B', 'Aab$', '$$ca', 'C$B$', 'cat']) - >>> s.str.count('\\$') + >>> s = pd.Series(["$", "B", "Aab$", "$$ca", "C$B$", "cat"]) + >>> s.str.count("\\$") 0 1 1 0 2 1 @@ -2395,7 +2407,7 @@ def count(self, pat, flags: int = 0): This is also available on Index - >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') + >>> pd.Index(["A", "A", "Aaba", "cat"]).str.count("a") Index([0, 0, 2, 1], dtype='int64') """ result = self._data.array._str_count(pat, flags) @@ -2434,7 +2446,7 @@ def startswith( Examples -------- - >>> s = pd.Series(['bat', 'Bear', 'cat', np.nan]) + >>> s = pd.Series(["bat", "Bear", "cat", np.nan]) >>> s 0 bat 1 Bear @@ -2442,14 +2454,14 @@ def startswith( 3 NaN dtype: object - >>> s.str.startswith('b') + >>> s.str.startswith("b") 0 True 1 False 2 False 3 NaN dtype: object - >>> s.str.startswith(('b', 'B')) + >>> s.str.startswith(("b", "B")) 0 True 1 True 2 False @@ -2458,7 +2470,7 @@ def startswith( Specifying `na` to be `False` instead of `NaN`. - >>> s.str.startswith('b', na=False) + >>> s.str.startswith("b", na=False) 0 True 1 False 2 False @@ -2504,7 +2516,7 @@ def endswith( Examples -------- - >>> s = pd.Series(['bat', 'bear', 'caT', np.nan]) + >>> s = pd.Series(["bat", "bear", "caT", np.nan]) >>> s 0 bat 1 bear @@ -2512,14 +2524,14 @@ def endswith( 3 NaN dtype: object - >>> s.str.endswith('t') + >>> s.str.endswith("t") 0 True 1 False 2 False 3 NaN dtype: object - >>> s.str.endswith(('t', 'T')) + >>> s.str.endswith(("t", "T")) 0 True 1 False 2 True @@ -2528,7 +2540,7 @@ def endswith( Specifying `na` to be `False` instead of `NaN`. - >>> s.str.endswith('t', na=False) + >>> s.str.endswith("t", na=False) 0 True 1 False 2 False @@ -2575,11 +2587,11 @@ def findall(self, pat, flags: int = 0): Examples -------- - >>> s = pd.Series(['Lion', 'Monkey', 'Rabbit']) + >>> s = pd.Series(["Lion", "Monkey", "Rabbit"]) The search for the pattern 'Monkey' returns one match: - >>> s.str.findall('Monkey') + >>> s.str.findall("Monkey") 0 [] 1 [Monkey] 2 [] @@ -2588,7 +2600,7 @@ def findall(self, pat, flags: int = 0): On the other hand, the search for the pattern 'MONKEY' doesn't return any match: - >>> s.str.findall('MONKEY') + >>> s.str.findall("MONKEY") 0 [] 1 [] 2 [] @@ -2598,7 +2610,7 @@ def findall(self, pat, flags: int = 0): to find the pattern 'MONKEY' ignoring the case: >>> import re - >>> s.str.findall('MONKEY', flags=re.IGNORECASE) + >>> s.str.findall("MONKEY", flags=re.IGNORECASE) 0 [] 1 [Monkey] 2 [] @@ -2607,7 +2619,7 @@ def findall(self, pat, flags: int = 0): When the pattern matches more than one string in the Series, all matches are returned: - >>> s.str.findall('on') + >>> s.str.findall("on") 0 [on] 1 [on] 2 [] @@ -2616,7 +2628,7 @@ def findall(self, pat, flags: int = 0): Regular expressions are supported too. For instance, the search for all the strings ending with the word 'on' is shown next: - >>> s.str.findall('on$') + >>> s.str.findall("on$") 0 [on] 1 [] 2 [] @@ -2625,7 +2637,7 @@ def findall(self, pat, flags: int = 0): If the pattern is found more than once in the same string, then a list of multiple strings is returned: - >>> s.str.findall('b') + >>> s.str.findall("b") 0 [] 1 [] 2 [b, b] @@ -2678,8 +2690,8 @@ def extract( A pattern with two groups will return a DataFrame with two columns. Non-matches will be NaN. - >>> s = pd.Series(['a1', 'b2', 'c3']) - >>> s.str.extract(r'([ab])(\d)') + >>> s = pd.Series(["a1", "b2", "c3"]) + >>> s.str.extract(r"([ab])(\d)") 0 1 0 a 1 1 b 2 @@ -2687,7 +2699,7 @@ def extract( A pattern may contain optional groups. - >>> s.str.extract(r'([ab])?(\d)') + >>> s.str.extract(r"([ab])?(\d)") 0 1 0 a 1 1 b 2 @@ -2695,7 +2707,7 @@ def extract( Named groups will become column names in the result. - >>> s.str.extract(r'(?P[ab])(?P\d)') + >>> s.str.extract(r"(?P[ab])(?P\d)") letter digit 0 a 1 1 b 2 @@ -2704,7 +2716,7 @@ def extract( A pattern with one group will return a DataFrame with one column if expand=True. - >>> s.str.extract(r'[ab](\d)', expand=True) + >>> s.str.extract(r"[ab](\d)", expand=True) 0 0 1 1 2 @@ -2712,7 +2724,7 @@ def extract( A pattern with one group will return a Series if expand=False. - >>> s.str.extract(r'[ab](\d)', expand=False) + >>> s.str.extract(r"[ab](\d)", expand=False) 0 1 1 2 2 NaN @@ -2938,8 +2950,8 @@ def normalize(self, form): Examples -------- - >>> ser = pd.Series(['ñ']) - >>> ser.str.normalize('NFC') == ser.str.normalize('NFD') + >>> ser = pd.Series(["ñ"]) + >>> ser.str.normalize("NFC") == ser.str.normalize("NFD") 0 False dtype: bool """ @@ -3052,12 +3064,9 @@ def len(self): Returns the length (number of characters) in a string. Returns the number of entries for dictionaries, lists or tuples. - >>> s = pd.Series(['dog', - ... '', - ... 5, - ... {'foo' : 'bar'}, - ... [2, 3, 5, 7], - ... ('one', 'two', 'three')]) + >>> s = pd.Series( + ... ["dog", "", 5, {"foo": "bar"}, [2, 3, 5, 7], ("one", "two", "three")] + ... ) >>> s 0 dog 1 diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 8e0a96e508516..6c8c2c7e5009e 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -897,9 +897,7 @@ def to_datetime( can be common abbreviations like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same - >>> df = pd.DataFrame({'year': [2015, 2016], - ... 'month': [2, 3], - ... 'day': [4, 5]}) + >>> df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) >>> pd.to_datetime(df) 0 2015-02-04 1 2016-03-05 @@ -907,9 +905,9 @@ def to_datetime( Using a unix epoch time - >>> pd.to_datetime(1490195805, unit='s') + >>> pd.to_datetime(1490195805, unit="s") Timestamp('2017-03-22 15:16:45') - >>> pd.to_datetime(1490195805433502912, unit='ns') + >>> pd.to_datetime(1490195805433502912, unit="ns") Timestamp('2017-03-22 15:16:45.433502912') .. warning:: For float arg, precision rounding might happen. To prevent @@ -917,8 +915,7 @@ def to_datetime( Using a non-unix epoch origin - >>> pd.to_datetime([1, 2, 3], unit='D', - ... origin=pd.Timestamp('1960-01-01')) + >>> pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")) DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None) @@ -926,8 +923,7 @@ def to_datetime( :const:`"%f"` will parse all the way up to nanoseconds. - >>> pd.to_datetime('2018-10-26 12:00:00.0000000011', - ... format='%Y-%m-%d %H:%M:%S.%f') + >>> pd.to_datetime("2018-10-26 12:00:00.0000000011", format="%Y-%m-%d %H:%M:%S.%f") Timestamp('2018-10-26 12:00:00.000000001') **Non-convertible date/times** @@ -935,7 +931,7 @@ def to_datetime( Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`, in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`. - >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce') + >>> pd.to_datetime("13000101", format="%Y%m%d", errors="coerce") NaT .. _to_datetime_tz_examples: @@ -946,14 +942,14 @@ def to_datetime( - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`: - >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15']) + >>> pd.to_datetime(["2018-10-26 12:00:00", "2018-10-26 13:00:15"]) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], dtype='datetime64[ns]', freq=None) - Timezone-aware inputs *with constant time offset* are converted to timezone-aware :class:`DatetimeIndex`: - >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500']) + >>> pd.to_datetime(["2018-10-26 12:00 -0500", "2018-10-26 13:00 -0500"]) DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'], dtype='datetime64[ns, UTC-05:00]', freq=None) @@ -965,8 +961,9 @@ def to_datetime( and a simple :class:`Index` containing :class:`datetime.datetime` objects will be returned: - >>> pd.to_datetime(['2020-10-25 02:00 +0200', - ... '2020-10-25 04:00 +0100']) # doctest: +SKIP + >>> pd.to_datetime( + ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] + ... ) # doctest: +SKIP FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` @@ -979,8 +976,9 @@ def to_datetime( a simple :class:`Index` containing :class:`datetime.datetime` objects: >>> from datetime import datetime - >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", - ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP + >>> pd.to_datetime( + ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] + ... ) # doctest: +SKIP FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` @@ -994,22 +992,21 @@ def to_datetime( - Timezone-naive inputs are *localized* as UTC - >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) + >>> pd.to_datetime(["2018-10-26 12:00", "2018-10-26 13:00"], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) - Timezone-aware inputs are *converted* to UTC (the output represents the exact same datetime, but viewed from the UTC time offset `+00:00`). - >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'], - ... utc=True) + >>> pd.to_datetime(["2018-10-26 12:00 -0530", "2018-10-26 12:00 -0500"], utc=True) DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) - Inputs can contain both string or datetime, the above rules still apply - >>> pd.to_datetime(['2018-10-26 12:00', datetime(2020, 1, 1, 18)], utc=True) + >>> pd.to_datetime(["2018-10-26 12:00", datetime(2020, 1, 1, 18)], utc=True) DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], dtype='datetime64[ns, UTC]', freq=None) """ diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py index 09652a7d8bc92..2ae57d3c8508e 100644 --- a/pandas/core/tools/numeric.py +++ b/pandas/core/tools/numeric.py @@ -124,24 +124,24 @@ def to_numeric( -------- Take separate series and convert to numeric, coercing when told to - >>> s = pd.Series(['1.0', '2', -3]) + >>> s = pd.Series(["1.0", "2", -3]) >>> pd.to_numeric(s) 0 1.0 1 2.0 2 -3.0 dtype: float64 - >>> pd.to_numeric(s, downcast='float') + >>> pd.to_numeric(s, downcast="float") 0 1.0 1 2.0 2 -3.0 dtype: float32 - >>> pd.to_numeric(s, downcast='signed') + >>> pd.to_numeric(s, downcast="signed") 0 1 1 2 2 -3 dtype: int8 - >>> s = pd.Series(['apple', '1.0', '2', -3]) - >>> pd.to_numeric(s, errors='coerce') + >>> s = pd.Series(["apple", "1.0", "2", -3]) + >>> pd.to_numeric(s, errors="coerce") 0 NaN 1 1.0 2 2.0 diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index fcf4f7606a594..47dfae3c6cadd 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -160,24 +160,24 @@ def to_timedelta( -------- Parsing a single string to a Timedelta: - >>> pd.to_timedelta('1 days 06:05:01.00003') + >>> pd.to_timedelta("1 days 06:05:01.00003") Timedelta('1 days 06:05:01.000030') - >>> pd.to_timedelta('15.5us') + >>> pd.to_timedelta("15.5us") Timedelta('0 days 00:00:00.000015500') Parsing a list or array of strings: - >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan']) + >>> pd.to_timedelta(["1 days 06:05:01.00003", "15.5us", "nan"]) TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT], dtype='timedelta64[ns]', freq=None) Converting numbers by specifying the `unit` keyword argument: - >>> pd.to_timedelta(np.arange(5), unit='s') + >>> pd.to_timedelta(np.arange(5), unit="s") TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03', '0 days 00:00:04'], dtype='timedelta64[ns]', freq=None) - >>> pd.to_timedelta(np.arange(5), unit='d') + >>> pd.to_timedelta(np.arange(5), unit="d") TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) """ diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b1a1da387ab83..b0048d5024064 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -959,7 +959,7 @@ class Window(BaseWindow): Examples -------- - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df = pd.DataFrame({"B": [0, 1, 2, np.nan, 4]}) >>> df B 0 0.0 @@ -982,12 +982,16 @@ class Window(BaseWindow): Rolling sum with a window span of 2 seconds. - >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, - ... index=[pd.Timestamp('20130101 09:00:00'), - ... pd.Timestamp('20130101 09:00:02'), - ... pd.Timestamp('20130101 09:00:03'), - ... pd.Timestamp('20130101 09:00:05'), - ... pd.Timestamp('20130101 09:00:06')]) + >>> df_time = pd.DataFrame( + ... {"B": [0, 1, 2, np.nan, 4]}, + ... index=[ + ... pd.Timestamp("20130101 09:00:00"), + ... pd.Timestamp("20130101 09:00:02"), + ... pd.Timestamp("20130101 09:00:03"), + ... pd.Timestamp("20130101 09:00:05"), + ... pd.Timestamp("20130101 09:00:06"), + ... ], + ... ) >>> df_time B @@ -997,7 +1001,7 @@ class Window(BaseWindow): 2013-01-01 09:00:05 NaN 2013-01-01 09:00:06 4.0 - >>> df_time.rolling('2s').sum() + >>> df_time.rolling("2s").sum() B 2013-01-01 09:00:00 0.0 2013-01-01 09:00:02 1.0 @@ -1065,7 +1069,7 @@ class Window(BaseWindow): Rolling sum with a window length of 2, using the Scipy ``'gaussian'`` window type. ``std`` is required in the aggregation function. - >>> df.rolling(2, win_type='gaussian').sum(std=3) + >>> df.rolling(2, win_type="gaussian").sum(std=3) B 0 NaN 1 0.986207 @@ -1077,12 +1081,17 @@ class Window(BaseWindow): Rolling sum with a window length of 2 days. - >>> df = pd.DataFrame({ - ... 'A': [pd.to_datetime('2020-01-01'), - ... pd.to_datetime('2020-01-01'), - ... pd.to_datetime('2020-01-02'),], - ... 'B': [1, 2, 3], }, - ... index=pd.date_range('2020', periods=3)) + >>> df = pd.DataFrame( + ... { + ... "A": [ + ... pd.to_datetime("2020-01-01"), + ... pd.to_datetime("2020-01-01"), + ... pd.to_datetime("2020-01-02"), + ... ], + ... "B": [1, 2, 3], + ... }, + ... index=pd.date_range("2020", periods=3), + ... ) >>> df A B @@ -1090,7 +1099,7 @@ class Window(BaseWindow): 2020-01-02 2020-01-01 2 2020-01-03 2020-01-02 3 - >>> df.rolling('2D', on='A').sum() + >>> df.rolling("2D", on="A").sum() A B 2020-01-01 2020-01-01 1.0 2020-01-02 2020-01-01 3.0 diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 97db508bda1b4..c51122fe9e140 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -49,9 +49,9 @@ class PerformanceWarning(Warning): Examples -------- - >>> df = pd.DataFrame({"jim": [0, 0, 1, 1], - ... "joe": ["x", "x", "z", "y"], - ... "jolie": [1, 2, 3, 4]}) + >>> df = pd.DataFrame( + ... {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": [1, 2, 3, 4]} + ... ) >>> df = df.set_index(["jim", "joe"]) >>> df jolie @@ -60,7 +60,7 @@ class PerformanceWarning(Warning): x 2 1 z 3 y 4 - >>> df.loc[(1, 'z')] # doctest: +SKIP + >>> df.loc[(1, "z")] # doctest: +SKIP # PerformanceWarning: indexing past lexsort depth may impact performance. df.loc[(1, 'z')] jolie @@ -77,10 +77,9 @@ class UnsupportedFunctionCall(ValueError): Examples -------- - >>> df = pd.DataFrame({"A": [0, 0, 1, 1], - ... "B": ["x", "x", "z", "y"], - ... "C": [1, 2, 3, 4]} - ... ) + >>> df = pd.DataFrame( + ... {"A": [0, 0, 1, 1], "B": ["x", "x", "z", "y"], "C": [1, 2, 3, 4]} + ... ) >>> np.cumsum(df.groupby(["A"])) Traceback (most recent call last): UnsupportedFunctionCall: numpy operations are not valid with groupby. @@ -96,10 +95,13 @@ class UnsortedIndexError(KeyError): Examples -------- - >>> df = pd.DataFrame({"cat": [0, 0, 1, 1], - ... "color": ["white", "white", "brown", "black"], - ... "lives": [4, 4, 3, 7]}, - ... ) + >>> df = pd.DataFrame( + ... { + ... "cat": [0, 0, 1, 1], + ... "color": ["white", "white", "brown", "black"], + ... "lives": [4, 4, 3, 7], + ... }, + ... ) >>> df = df.set_index(["cat", "color"]) >>> df lives @@ -108,7 +110,7 @@ class UnsortedIndexError(KeyError): white 4 1 brown 3 black 7 - >>> df.loc[(0, "black"):(1, "white")] + >>> df.loc[(0, "black") : (1, "white")] Traceback (most recent call last): UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)' @@ -133,7 +135,7 @@ class ParserError(ValueError): ... cat,foo,bar ... dog,foo,"baz''' >>> from io import StringIO - >>> pd.read_csv(StringIO(data), skipfooter=1, engine='python') + >>> pd.read_csv(StringIO(data), skipfooter=1, engine="python") Traceback (most recent call last): ParserError: ',' expected after '"'. Error could possibly be due to parsing errors in the skipped footer rows @@ -167,11 +169,14 @@ class DtypeWarning(Warning): This example creates and reads a large CSV file with a column that contains `int` and `str`. - >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + - ... ['1'] * 100000), - ... 'b': ['b'] * 300000}) # doctest: +SKIP - >>> df.to_csv('test.csv', index=False) # doctest: +SKIP - >>> df2 = pd.read_csv('test.csv') # doctest: +SKIP + >>> df = pd.DataFrame( + ... { + ... "a": (["1"] * 100000 + ["X"] * 100000 + ["1"] * 100000), + ... "b": ["b"] * 300000, + ... } + ... ) # doctest: +SKIP + >>> df.to_csv("test.csv", index=False) # doctest: +SKIP + >>> df2 = pd.read_csv("test.csv") # doctest: +SKIP ... # DtypeWarning: Columns (0) have mixed types Important to notice that ``df2`` will contain both `str` and `int` for the @@ -189,7 +194,7 @@ class DtypeWarning(Warning): One way to solve this issue is using the `dtype` parameter in the `read_csv` and `read_table` functions to explicit the conversion: - >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP + >>> df2 = pd.read_csv("test.csv", sep=",", dtype={"a": str}) # doctest: +SKIP No warning was issued. """ @@ -241,12 +246,12 @@ class ParserWarning(Warning): >>> csv = '''a;b;c ... 1;1,8 ... 1;2,1''' - >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP + >>> df = pd.read_csv(io.StringIO(csv), sep="[;,]") # doctest: +SKIP ... # ParserWarning: Falling back to the 'python' engine... Adding `engine='python'` to `pd.read_csv` removes the Warning: - >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python') + >>> df = pd.read_csv(io.StringIO(csv), sep="[;,]", engine="python") """ @@ -258,13 +263,19 @@ class MergeError(ValueError): Examples -------- - >>> left = pd.DataFrame({"a": ["a", "b", "b", "d"], - ... "b": ["cat", "dog", "weasel", "horse"]}, - ... index=range(4)) - >>> right = pd.DataFrame({"a": ["a", "b", "c", "d"], - ... "c": ["meow", "bark", "chirp", "nay"]}, - ... index=range(4)).set_index("a") - >>> left.join(right, on="a", validate="one_to_one",) + >>> left = pd.DataFrame( + ... {"a": ["a", "b", "b", "d"], "b": ["cat", "dog", "weasel", "horse"]}, + ... index=range(4), + ... ) + >>> right = pd.DataFrame( + ... {"a": ["a", "b", "c", "d"], "c": ["meow", "bark", "chirp", "nay"]}, + ... index=range(4), + ... ).set_index("a") + >>> left.join( + ... right, + ... on="a", + ... validate="one_to_one", + ... ) Traceback (most recent call last): MergeError: Merge keys are not unique in left dataset; not a one-to-one merge """ @@ -280,6 +291,7 @@ class AbstractMethodError(NotImplementedError): ... @classmethod ... def classmethod(cls): ... raise pd.errors.AbstractMethodError(cls, methodtype="classmethod") + ... ... def method(self): ... raise pd.errors.AbstractMethodError(self) >>> test = Foo.classmethod() @@ -314,8 +326,9 @@ class NumbaUtilError(Exception): Examples -------- - >>> df = pd.DataFrame({"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]}, - ... columns=["key", "data"]) + >>> df = pd.DataFrame( + ... {"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]}, columns=["key", "data"] + ... ) >>> def incorrect_function(x): ... return sum(x) * 2.7 >>> df.groupby("key").agg(incorrect_function, engine="numba") @@ -331,10 +344,10 @@ class DuplicateLabelError(ValueError): Examples -------- - >>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags( + >>> s = pd.Series([0, 1, 2], index=["a", "b", "c"]).set_flags( ... allows_duplicate_labels=False ... ) - >>> s.reindex(['a', 'a', 'b']) + >>> s.reindex(["a", "a", "b"]) Traceback (most recent call last): ... DuplicateLabelError: Index has duplicates. @@ -351,8 +364,7 @@ class InvalidIndexError(Exception): Examples -------- >>> idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]]) - >>> df = pd.DataFrame([[1, 1, 2, 2], - ... [3, 3, 4, 4]], columns=idx) + >>> df = pd.DataFrame([[1, 1, 2, 2], [3, 3, 4, 4]], columns=idx) >>> df x y 0 1 0 1 @@ -373,7 +385,7 @@ class DataError(Exception): Examples -------- - >>> ser = pd.Series(['a', 'b', 'c']) + >>> ser = pd.Series(["a", "b", "c"]) >>> ser.rolling(2).sum() Traceback (most recent call last): DataError: No numeric types to aggregate @@ -394,16 +406,14 @@ class SpecificationError(Exception): Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2], - ... 'B': range(5), - ... 'C': range(5)}) - >>> df.groupby('A').B.agg({'foo': 'count'}) # doctest: +SKIP + >>> df = pd.DataFrame({"A": [1, 1, 1, 2, 2], "B": range(5), "C": range(5)}) + >>> df.groupby("A").B.agg({"foo": "count"}) # doctest: +SKIP ... # SpecificationError: nested renamer is not supported - >>> df.groupby('A').agg({'B': {'foo': ['sum', 'max']}}) # doctest: +SKIP + >>> df.groupby("A").agg({"B": {"foo": ["sum", "max"]}}) # doctest: +SKIP ... # SpecificationError: nested renamer is not supported - >>> df.groupby('A').agg(['min', 'min']) # doctest: +SKIP + >>> df.groupby("A").agg(["min", "min"]) # doctest: +SKIP ... # SpecificationError: nested renamer is not supported """ @@ -424,7 +434,7 @@ class ChainedAssignmentError(Warning): Examples -------- >>> pd.options.mode.copy_on_write = True - >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A']) + >>> df = pd.DataFrame({"A": [1, 1, 1, 2, 2]}, columns=["A"]) >>> df["A"][0:3] = 10 # doctest: +SKIP ... # ChainedAssignmentError: ... >>> pd.options.mode.copy_on_write = False @@ -441,11 +451,11 @@ class NumExprClobberingError(NameError): Examples -------- - >>> df = pd.DataFrame({'abs': [1, 1, 1]}) + >>> df = pd.DataFrame({"abs": [1, 1, 1]}) >>> df.query("abs > 2") # doctest: +SKIP ... # NumExprClobberingError: Variables in expression "(abs) > (2)" overlap... >>> sin, a = 1, 2 - >>> pd.eval("sin + a", engine='numexpr') # doctest: +SKIP + >>> pd.eval("sin + a", engine="numexpr") # doctest: +SKIP ... # NumExprClobberingError: Variables in expression "(sin) + (a)" overlap... """ @@ -458,12 +468,12 @@ class UndefinedVariableError(NameError): Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 1]}) + >>> df = pd.DataFrame({"A": [1, 1, 1]}) >>> df.query("A > x") # doctest: +SKIP ... # UndefinedVariableError: name 'x' is not defined >>> df.query("A > @y") # doctest: +SKIP ... # UndefinedVariableError: local variable 'y' is not defined - >>> pd.eval('x + 1') # doctest: +SKIP + >>> pd.eval("x + 1") # doctest: +SKIP ... # UndefinedVariableError: name 'x' is not defined """ @@ -493,17 +503,16 @@ class IndexingError(Exception): Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 1]}) - >>> df.loc[..., ..., 'A'] # doctest: +SKIP + >>> df = pd.DataFrame({"A": [1, 1, 1]}) + >>> df.loc[..., ..., "A"] # doctest: +SKIP ... # IndexingError: indexer may only contain one '...' entry - >>> df = pd.DataFrame({'A': [1, 1, 1]}) - >>> df.loc[1, ..., ...] # doctest: +SKIP + >>> df = pd.DataFrame({"A": [1, 1, 1]}) + >>> df.loc[1, ..., ...] # doctest: +SKIP ... # IndexingError: Too many indexers - >>> df[pd.Series([True], dtype=bool)] # doctest: +SKIP + >>> df[pd.Series([True], dtype=bool)] # doctest: +SKIP ... # IndexingError: Unalignable boolean Series provided as indexer... - >>> s = pd.Series(range(2), - ... index=pd.MultiIndex.from_product([["a", "b"], ["c"]])) - >>> s.loc["a", "c", "d"] # doctest: +SKIP + >>> s = pd.Series(range(2), index=pd.MultiIndex.from_product([["a", "b"], ["c"]])) + >>> s.loc["a", "c", "d"] # doctest: +SKIP ... # IndexingError: Too many indexers """ @@ -539,14 +548,14 @@ class CSSWarning(UserWarning): Examples -------- - >>> df = pd.DataFrame({'A': [1, 1, 1]}) - >>> df.style.applymap( - ... lambda x: 'background-color: blueGreenRed;' - ... ).to_excel('styled.xlsx') # doctest: +SKIP + >>> df = pd.DataFrame({"A": [1, 1, 1]}) + >>> df.style.applymap(lambda x: "background-color: blueGreenRed;").to_excel( + ... "styled.xlsx" + ... ) # doctest: +SKIP CSSWarning: Unhandled color format: 'blueGreenRed' - >>> df.style.applymap( - ... lambda x: 'border: 1px solid red red;' - ... ).to_excel('styled.xlsx') # doctest: +SKIP + >>> df.style.applymap(lambda x: "border: 1px solid red red;").to_excel( + ... "styled.xlsx" + ... ) # doctest: +SKIP CSSWarning: Unhandled color format: 'blueGreenRed' """ @@ -557,9 +566,8 @@ class PossibleDataLossError(Exception): Examples -------- - >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP + >>> store = pd.HDFStore("my-store", "a") # doctest: +SKIP >>> store.open("w") # doctest: +SKIP - ... # PossibleDataLossError: Re-opening the file [my-store] with mode [a]... """ @@ -569,7 +577,7 @@ class ClosedFileError(Exception): Examples -------- - >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP + >>> store = pd.HDFStore("my-store", "a") # doctest: +SKIP >>> store.close() # doctest: +SKIP >>> store.keys() # doctest: +SKIP ... # ClosedFileError: my-store file is not open! @@ -592,12 +600,12 @@ class AttributeConflictWarning(Warning): Examples -------- - >>> idx1 = pd.Index(['a', 'b'], name='name1') + >>> idx1 = pd.Index(["a", "b"], name="name1") >>> df1 = pd.DataFrame([[1, 2], [3, 4]], index=idx1) - >>> df1.to_hdf('file', 'data', 'w', append=True) # doctest: +SKIP - >>> idx2 = pd.Index(['c', 'd'], name='name2') + >>> df1.to_hdf("file", "data", "w", append=True) # doctest: +SKIP + >>> idx2 = pd.Index(["c", "d"], name="name2") >>> df2 = pd.DataFrame([[5, 6], [7, 8]], index=idx2) - >>> df2.to_hdf('file', 'data', 'a', append=True) # doctest: +SKIP + >>> df2.to_hdf("file", "data", "a", append=True) # doctest: +SKIP AttributeConflictWarning: the [index_name] attribute of the existing index is [name1] which conflicts with the new [name2]... """ @@ -616,9 +624,8 @@ class DatabaseError(OSError): Examples -------- >>> from sqlite3 import connect - >>> conn = connect(':memory:') - >>> pd.read_sql('select * test', conn) # doctest: +SKIP - ... # DatabaseError: Execution failed on sql 'test': near "test": syntax error + >>> conn = connect(":memory:") + >>> pd.read_sql("select * test", conn) # doctest: +SKIP """ @@ -632,8 +639,7 @@ class PossiblePrecisionLoss(Warning): Examples -------- >>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)}) - >>> df.to_stata('test') # doctest: +SKIP - ... # PossiblePrecisionLoss: Column converted from int64 to float64... + >>> df.to_stata("test") # doctest: +SKIP """ @@ -644,8 +650,7 @@ class ValueLabelTypeMismatch(Warning): Examples -------- >>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")}) - >>> df.to_stata('test') # doctest: +SKIP - ... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str... + >>> df.to_stata("test") # doctest: +SKIP """ @@ -663,8 +668,7 @@ class InvalidColumnName(Warning): Examples -------- >>> df = pd.DataFrame({"0categories": pd.Series([2, 2])}) - >>> df.to_stata('test') # doctest: +SKIP - ... # InvalidColumnName: Not all pandas column names were valid Stata variable... + >>> df.to_stata("test") # doctest: +SKIP """ @@ -675,7 +679,7 @@ class CategoricalConversionWarning(Warning): Examples -------- >>> from pandas.io.stata import StataReader - >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP + >>> with StataReader("dta_file", chunksize=2) as reader: # doctest: +SKIP ... for i, block in enumerate(reader): ... print(i, block) ... # CategoricalConversionWarning: One or more series with value labels... diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index a15e37328e9fa..8e8b22967ea01 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -64,7 +64,7 @@ def read_clipboard( Examples -------- - >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) >>> df.to_clipboard() # doctest: +SKIP >>> pd.read_clipboard() # doctest: +SKIP A B C diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 4109b6d0965bb..1f272d0e09db8 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1019,7 +1019,7 @@ class ExcelWriter(Generic[_WorkbookT]): >>> with pd.ExcelWriter( ... "path_to_file.xlsx", ... date_format="YYYY-MM-DD", - ... datetime_format="YYYY-MM-DD HH:MM:SS" + ... datetime_format="YYYY-MM-DD HH:MM:SS", ... ) as writer: ... df.to_excel(writer) # doctest: +SKIP @@ -1073,7 +1073,7 @@ class ExcelWriter(Generic[_WorkbookT]): >>> with pd.ExcelWriter( ... "path_to_file.xlsx", ... engine="xlsxwriter", - ... engine_kwargs={{"options": {{"nan_inf_to_errors": True}}}} + ... engine_kwargs={{"options": {{"nan_inf_to_errors": True}}}}, ... ) as writer: ... df.to_excel(writer) # doctest: +SKIP @@ -1084,7 +1084,7 @@ class ExcelWriter(Generic[_WorkbookT]): ... "path_to_file.xlsx", ... engine="openpyxl", ... mode="a", - ... engine_kwargs={{"keep_vba": True}} + ... engine_kwargs={{"keep_vba": True}}, ... ) as writer: ... df.to_excel(writer, sheet_name="Sheet2") # doctest: +SKIP """ @@ -1494,7 +1494,7 @@ class ExcelFile: Examples -------- - >>> file = pd.ExcelFile('myfile.xlsx') # doctest: +SKIP + >>> file = pd.ExcelFile("myfile.xlsx") # doctest: +SKIP >>> with pd.ExcelFile("myfile.xls") as xls: # doctest: +SKIP ... df1 = pd.read_excel(xls, "Sheet1") # doctest: +SKIP """ @@ -1617,9 +1617,9 @@ def parse( Examples -------- - >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C']) - >>> df.to_excel('myfile.xlsx') # doctest: +SKIP - >>> file = pd.ExcelFile('myfile.xlsx') # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) + >>> df.to_excel("myfile.xlsx") # doctest: +SKIP + >>> file = pd.ExcelFile("myfile.xlsx") # doctest: +SKIP >>> file.parse() # doctest: +SKIP """ return self._reader.parse( diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index f7a1fcb8052e3..95d43f60a22c5 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -143,9 +143,9 @@ def _range2cols(areas: str) -> list[int]: Examples -------- - >>> _range2cols('A:E') + >>> _range2cols("A:E") [0, 1, 2, 3, 4] - >>> _range2cols('A,C,Z:AB') + >>> _range2cols("A,C,Z:AB") [0, 2, 25, 26, 27] """ cols: list[int] = [] diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py index 89f7cb9c4dec6..0c6885d789f15 100644 --- a/pandas/io/formats/css.py +++ b/pandas/io/formats/css.py @@ -244,14 +244,17 @@ def __call__( Examples -------- >>> resolve = CSSResolver() - >>> inherited = {'font-family': 'serif', 'font-weight': 'bold'} - >>> out = resolve(''' + >>> inherited = {"font-family": "serif", "font-weight": "bold"} + >>> out = resolve( + ... ''' ... border-color: BLUE RED; ... font-size: 1em; ... font-size: 2em; ... font-weight: normal; ... font-weight: inherit; - ... ''', inherited) + ... ''', + ... inherited, + ... ) >>> sorted(out.items()) # doctest: +NORMALIZE_WHITESPACE [('border-bottom-color', 'blue'), ('border-left-color', 'red'), diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 2d28b032ca49d..a837eddd6cf5b 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -334,10 +334,10 @@ def _sizeof_fmt(num: float, size_qualifier: str) -> str: Examples -------- - >>> _sizeof_fmt(23028, '') + >>> _sizeof_fmt(23028, "") '22.5 KB' - >>> _sizeof_fmt(23028, '+') + >>> _sizeof_fmt(23028, "+") '22.5+ KB' """ for x in ["bytes", "KB", "MB", "GB", "TB"]: diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 2cc9368f8846a..45465eb51c975 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -474,7 +474,7 @@ def _justify( Examples -------- - >>> _justify([['a', 'b']], [['abc', 'abcd']]) + >>> _justify([["a", "b"]], [["abc", "abcd"]]) ([(' a', ' b')], [('abc', 'abcd')]) """ combined = head + tail diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3a6a44a8be253..7be23b69dfa09 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -245,10 +245,12 @@ class Styler(StylerRenderer): Examples -------- - >>> df = pd.DataFrame([[1.0, 2.0, 3.0], [4, 5, 6]], index=['a', 'b'], - ... columns=['A', 'B', 'C']) - >>> pd.io.formats.style.Styler(df, precision=2, - ... caption="My table") # doctest: +SKIP + >>> df = pd.DataFrame( + ... [[1.0, 2.0, 3.0], [4, 5, 6]], index=["a", "b"], columns=["A", "B", "C"] + ... ) + >>> pd.io.formats.style.Styler( + ... df, precision=2, caption="My table" + ... ) # doctest: +SKIP Please see: `Table Visualization <../../user_guide/style.ipynb>`_ for more examples. @@ -355,9 +357,11 @@ def concat(self, other: Styler) -> Styler: A common use case is adding totals rows, or otherwise, via methods calculated in ``DataFrame.agg``. - >>> df = pd.DataFrame([[4, 6], [1, 9], [3, 4], [5, 5], [9, 6]], - ... columns=["Mike", "Jim"], - ... index=["Mon", "Tue", "Wed", "Thurs", "Fri"]) + >>> df = pd.DataFrame( + ... [[4, 6], [1, 9], [3, 4], [5, 5], [9, 6]], + ... columns=["Mike", "Jim"], + ... index=["Mon", "Tue", "Wed", "Thurs", "Fri"], + ... ) >>> styler = df.style.concat(df.agg(["sum"]).style) # doctest: +SKIP .. figure:: ../../_static/style/footer_simple.png @@ -367,14 +371,16 @@ def concat(self, other: Styler) -> Styler: >>> descriptors = df.agg(["sum", "mean", lambda s: s.dtype]) >>> descriptors.index = ["Total", "Average", "dtype"] - >>> other = (descriptors.style - ... .highlight_max(axis=1, subset=(["Total", "Average"], slice(None))) - ... .format(subset=("Average", slice(None)), precision=2, decimal=",") - ... .map(lambda v: "font-weight: bold;")) - >>> styler = (df.style - ... .highlight_max(color="salmon") - ... .set_table_styles([{"selector": ".foot_row0", - ... "props": "border-top: 1px solid black;"}])) + >>> other = ( + ... descriptors.style.highlight_max( + ... axis=1, subset=(["Total", "Average"], slice(None)) + ... ) + ... .format(subset=("Average", slice(None)), precision=2, decimal=",") + ... .map(lambda v: "font-weight: bold;") + ... ) + >>> styler = df.style.highlight_max(color="salmon").set_table_styles( + ... [{"selector": ".foot_row0", "props": "border-top: 1px solid black;"}] + ... ) >>> styler.concat(other) # doctest: +SKIP .. figure:: ../../_static/style/footer_extended.png @@ -382,8 +388,9 @@ def concat(self, other: Styler) -> Styler: When ``other`` has fewer index levels than the original Styler it is possible to extend the index in ``other``, with placeholder levels. - >>> df = pd.DataFrame([[1], [2]], - ... index=pd.MultiIndex.from_product([[0], [1, 2]])) + >>> df = pd.DataFrame( + ... [[1], [2]], index=pd.MultiIndex.from_product([[0], [1, 2]]) + ... ) >>> descriptors = df.agg(["sum"]) >>> descriptors.index = pd.MultiIndex.from_product([[""], descriptors.index]) >>> df.style.concat(descriptors.style) # doctest: +SKIP @@ -482,13 +489,20 @@ def set_tooltips( Optionally controlling the tooltip visual display - >>> df.style.set_tooltips(ttips, css_class='tt-add', props=[ - ... ('visibility', 'hidden'), - ... ('position', 'absolute'), - ... ('z-index', 1)]) # doctest: +SKIP >>> df.style.set_tooltips( - ... ttips, css_class='tt-add', - ... props='visibility:hidden; position:absolute; z-index:1;') + ... ttips, + ... css_class="tt-add", + ... props=[ + ... ("visibility", "hidden"), + ... ("position", "absolute"), + ... ("z-index", 1), + ... ], + ... ) # doctest: +SKIP + >>> df.style.set_tooltips( + ... ttips, + ... css_class="tt-add", + ... props="visibility:hidden; position:absolute; z-index:1;", + ... ) ... # doctest: +SKIP """ if not self.cell_ids: @@ -1316,7 +1330,7 @@ def to_html( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> print(df.style.to_html()) # doctest: +SKIP @@ -1443,7 +1457,7 @@ def to_string( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> df.style.to_string() ' A B\\n0 1 3\\n1 2 4\\n' """ @@ -1496,19 +1510,24 @@ def set_td_classes(self, classes: DataFrame) -> Styler: Examples -------- >>> df = pd.DataFrame(data=[[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"]) - >>> classes = pd.DataFrame([ - ... ["min-val red", "", "blue"], - ... ["red", None, "blue max-val"] - ... ], index=df.index, columns=df.columns) + >>> classes = pd.DataFrame( + ... [["min-val red", "", "blue"], ["red", None, "blue max-val"]], + ... index=df.index, + ... columns=df.columns, + ... ) >>> df.style.set_td_classes(classes) # doctest: +SKIP Using `MultiIndex` columns and a `classes` `DataFrame` as a subset of the underlying, - >>> df = pd.DataFrame([[1, 2], [3, 4]], index=["a", "b"], - ... columns=[["level0", "level0"], ["level1a", "level1b"]]) - >>> classes = pd.DataFrame(["min-val"], index=["a"], - ... columns=[["level0"], ["level1a"]]) + >>> df = pd.DataFrame( + ... [[1, 2], [3, 4]], + ... index=["a", "b"], + ... columns=[["level0", "level0"], ["level1a", "level1b"]], + ... ) + >>> classes = pd.DataFrame( + ... ["min-val"], index=["a"], columns=[["level0"], ["level1a"]] + ... ) >>> df.style.set_td_classes(classes) # doctest: +SKIP Form of the output with new additional css classes, @@ -1680,11 +1699,11 @@ def clear(self) -> None: Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, np.nan]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, np.nan]}) After any added style: - >>> df.style.highlight_null(color='yellow') # doctest: +SKIP + >>> df.style.highlight_null(color="yellow") # doctest: +SKIP Remove it with: @@ -1821,22 +1840,22 @@ def apply( >>> def highlight_max(x, color): ... return np.where(x == np.nanmax(x.to_numpy()), f"color: {color};", None) >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) - >>> df.style.apply(highlight_max, color='red') # doctest: +SKIP - >>> df.style.apply(highlight_max, color='blue', axis=1) # doctest: +SKIP - >>> df.style.apply(highlight_max, color='green', axis=None) # doctest: +SKIP + >>> df.style.apply(highlight_max, color="red") # doctest: +SKIP + >>> df.style.apply(highlight_max, color="blue", axis=1) # doctest: +SKIP + >>> df.style.apply(highlight_max, color="green", axis=None) # doctest: +SKIP Using ``subset`` to restrict application to a single column or multiple columns - >>> df.style.apply(highlight_max, color='red', subset="A") + >>> df.style.apply(highlight_max, color="red", subset="A") ... # doctest: +SKIP - >>> df.style.apply(highlight_max, color='red', subset=["A", "B"]) + >>> df.style.apply(highlight_max, color="red", subset=["A", "B"]) ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.apply(highlight_max, color='red', subset=([0, 1, 2], slice(None))) + >>> df.style.apply(highlight_max, color="red", subset=([0, 1, 2], slice(None))) ... # doctest: +SKIP - >>> df.style.apply(highlight_max, color='red', subset=(slice(0, 5, 2), "A")) + >>> df.style.apply(highlight_max, color="red", subset=(slice(0, 5, 2), "A")) ... # doctest: +SKIP Using a function which returns a Series / DataFrame of unequal length but @@ -1945,7 +1964,7 @@ def apply_index( Selectively applying to specific levels of MultiIndex columns. - >>> midx = pd.MultiIndex.from_product([['ix', 'jy'], [0, 1], ['x3', 'z4']]) + >>> midx = pd.MultiIndex.from_product([["ix", "jy"], [0, 1], ["x3", "z4"]]) >>> df = pd.DataFrame([np.arange(8)], columns=midx) >>> def highlight_x({var}): ... return {ret2} @@ -2073,20 +2092,22 @@ def map(self, func: Callable, subset: Subset | None = None, **kwargs) -> Styler: >>> def color_negative(v, color): ... return f"color: {color};" if v < 0 else None >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) - >>> df.style.map(color_negative, color='red') # doctest: +SKIP + >>> df.style.map(color_negative, color="red") # doctest: +SKIP Using ``subset`` to restrict application to a single column or multiple columns - >>> df.style.map(color_negative, color='red', subset="A") # doctest: +SKIP - >>> df.style.map(color_negative, - ... color='red', subset=["A", "B"]) # doctest: +SKIP + >>> df.style.map(color_negative, color="red", subset="A") + ... # doctest: +SKIP + >>> df.style.map(color_negative, color="red", subset=["A", "B"]) + ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.map(color_negative, color='red', - ... subset=([0, 1, 2], slice(None))) # doctest: +SKIP - >>> df.style.map(color_negative, - ... color='red', subset=(slice(0, 5, 2), "A")) # doctest: +SKIP + >>> df.style.map( + ... color_negative, color="red", subset=([0, 1, 2], slice(None)) + ... ) # doctest: +SKIP + >>> df.style.map(color_negative, color="red", subset=(slice(0, 5, 2), "A")) + ... # doctest: +SKIP See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for more details. @@ -2301,7 +2322,7 @@ def set_uuid(self, uuid: str) -> Styler: Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['A', 'B'], columns=['c1', 'c2']) + >>> df = pd.DataFrame([[1, 2], [3, 4]], index=["A", "B"], columns=["c1", "c2"]) You can get the `id` attributes with the following: @@ -2335,7 +2356,7 @@ def set_caption(self, caption: str | tuple | list) -> Styler: Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> df.style.set_caption("test") # doctest: +SKIP Please see: @@ -2391,7 +2412,7 @@ def set_sticky( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> df.style.set_sticky(axis="index") # doctest: +SKIP Please see: @@ -2552,49 +2573,55 @@ def set_table_styles( .. code-block:: python - css_class_names = {"row_heading": "row_heading", - "col_heading": "col_heading", - "index_name": "index_name", - "col": "col", - "row": "row", - "col_trim": "col_trim", - "row_trim": "row_trim", - "level": "level", - "data": "data", - "blank": "blank", - "foot": "foot"} + css_class_names = { + "row_heading": "row_heading", + "col_heading": "col_heading", + "index_name": "index_name", + "col": "col", + "row": "row", + "col_trim": "col_trim", + "row_trim": "row_trim", + "level": "level", + "data": "data", + "blank": "blank", + "foot": "foot", + } Examples -------- - >>> df = pd.DataFrame(np.random.randn(10, 4), - ... columns=['A', 'B', 'C', 'D']) + >>> df = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) >>> df.style.set_table_styles( - ... [{'selector': 'tr:hover', - ... 'props': [('background-color', 'yellow')]}] + ... [{"selector": "tr:hover", "props": [("background-color", "yellow")]}] ... ) # doctest: +SKIP Or with CSS strings >>> df.style.set_table_styles( - ... [{'selector': 'tr:hover', - ... 'props': 'background-color: yellow; font-size: 1em;'}] + ... [ + ... { + ... "selector": "tr:hover", + ... "props": "background-color: yellow; font-size: 1em;", + ... } + ... ] ... ) # doctest: +SKIP Adding column styling by name - >>> df.style.set_table_styles({ - ... 'A': [{'selector': '', - ... 'props': [('color', 'red')]}], - ... 'B': [{'selector': 'td', - ... 'props': 'color: blue;'}] - ... }, overwrite=False) # doctest: +SKIP + >>> df.style.set_table_styles( + ... { + ... "A": [{"selector": "", "props": [("color", "red")]}], + ... "B": [{"selector": "td", "props": "color: blue;"}], + ... }, + ... overwrite=False, + ... ) # doctest: +SKIP Adding row styling - >>> df.style.set_table_styles({ - ... 0: [{'selector': 'td:hover', - ... 'props': [('font-size', '25px')]}] - ... }, axis=1, overwrite=False) # doctest: +SKIP + >>> df.style.set_table_styles( + ... {0: [{"selector": "td:hover", "props": [("font-size", "25px")]}]}, + ... axis=1, + ... overwrite=False, + ... ) # doctest: +SKIP See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for more details. @@ -2923,10 +2950,14 @@ def background_gradient( Examples -------- - >>> df = pd.DataFrame(columns=["City", "Temp (c)", "Rain (mm)", "Wind (m/s)"], - ... data=[["Stockholm", 21.6, 5.0, 3.2], - ... ["Oslo", 22.4, 13.3, 3.1], - ... ["Copenhagen", 24.5, 0.0, 6.7]]) + >>> df = pd.DataFrame( + ... columns=["City", "Temp (c)", "Rain (mm)", "Wind (m/s)"], + ... data=[ + ... ["Stockholm", 21.6, 5.0, 3.2], + ... ["Oslo", 22.4, 13.3, 3.1], + ... ["Copenhagen", 24.5, 0.0, 6.7], + ... ], + ... ) Shading the values column-wise, with ``axis=0``, preselecting numeric columns @@ -2963,9 +2994,9 @@ def background_gradient( explicitly state ``subset`` to match the ``gmap`` shape >>> gmap = np.array([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) - >>> df.style.{name}_gradient( - ... axis=None, gmap=gmap, cmap='YlOrRd', - ... subset=['Temp (c)', 'Rain (mm)', 'Wind (m/s)']) # doctest: +SKIP + >>> df.style.{name}_gradient(axis=None, gmap=gmap, + ... cmap='YlOrRd', subset=['Temp (c)', 'Rain (mm)', 'Wind (m/s)'] + ... ) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_gmap.png """ @@ -3044,7 +3075,7 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df.style.set_properties(color="white", align="right") # doctest: +SKIP - >>> df.style.set_properties(**{'background-color': 'yellow'}) # doctest: +SKIP + >>> df.style.set_properties(**{"background-color": "yellow"}) # doctest: +SKIP See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for more details. @@ -3140,8 +3171,8 @@ def bar( # pylint: disable=disallowed-name Examples -------- - >>> df = pd.DataFrame({'A': [1, 2, 3, 4], 'B': [3, 4, 5, 6]}) - >>> df.style.bar(subset=['A'], color='gray') # doctest: +SKIP + >>> df = pd.DataFrame({"A": [1, 2, 3, 4], "B": [3, 4, 5, 6]}) + >>> df.style.bar(subset=["A"], color="gray") # doctest: +SKIP """ if color is None and cmap is None: color = "#d65f5f" @@ -3219,8 +3250,8 @@ def highlight_null( Examples -------- - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, np.nan]}) - >>> df.style.highlight_null(color='yellow') # doctest: +SKIP + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, np.nan]}) + >>> df.style.highlight_null(color="yellow") # doctest: +SKIP Please see: `Table Visualization <../../user_guide/style.ipynb>`_ for more examples. @@ -3273,8 +3304,8 @@ def highlight_max( Examples -------- - >>> df = pd.DataFrame({'A': [2, 1], 'B': [3, 4]}) - >>> df.style.highlight_max(color='yellow') # doctest: +SKIP + >>> df = pd.DataFrame({"A": [2, 1], "B": [3, 4]}) + >>> df.style.highlight_max(color="yellow") # doctest: +SKIP Please see: `Table Visualization <../../user_guide/style.ipynb>`_ for more examples. @@ -3329,8 +3360,8 @@ def highlight_min( Examples -------- - >>> df = pd.DataFrame({'A': [2, 1], 'B': [3, 4]}) - >>> df.style.highlight_min(color='yellow') # doctest: +SKIP + >>> df = pd.DataFrame({"A": [2, 1], "B": [3, 4]}) + >>> df.style.highlight_min(color="yellow") # doctest: +SKIP Please see: `Table Visualization <../../user_guide/style.ipynb>`_ for more examples. @@ -3409,11 +3440,13 @@ def highlight_between( -------- Basic usage - >>> df = pd.DataFrame({ - ... 'One': [1.2, 1.6, 1.5], - ... 'Two': [2.9, 2.1, 2.5], - ... 'Three': [3.1, 3.2, 3.8], - ... }) + >>> df = pd.DataFrame( + ... { + ... "One": [1.2, 1.6, 1.5], + ... "Two": [2.9, 2.1, 2.5], + ... "Three": [3.1, 3.2, 3.8], + ... } + ... ) >>> df.style.highlight_between(left=2.1, right=2.9) # doctest: +SKIP .. figure:: ../../_static/style/hbetw_basic.png @@ -3421,8 +3454,9 @@ def highlight_between( Using a range input sequence along an ``axis``, in this case setting a ``left`` and ``right`` for each column individually - >>> df.style.highlight_between(left=[1.4, 2.4, 3.4], right=[1.6, 2.6, 3.6], - ... axis=1, color="#fffd75") # doctest: +SKIP + >>> df.style.highlight_between( + ... left=[1.4, 2.4, 3.4], right=[1.6, 2.6, 3.6], axis=1, color="#fffd75" + ... ) # doctest: +SKIP .. figure:: ../../_static/style/hbetw_seq.png @@ -3430,16 +3464,19 @@ def highlight_between( matches the input DataFrame, with a constant ``right`` >>> df.style.highlight_between( - ... left=[[2, 2, 3], [2, 2, 3], [3, 3, 3]], right=3.5, - ... axis=None, color="#fffd75") # doctest: +SKIP + ... left=[[2, 2, 3], [2, 2, 3], [3, 3, 3]], + ... right=3.5, + ... axis=None, + ... color="#fffd75", + ... ) # doctest: +SKIP .. figure:: ../../_static/style/hbetw_axNone.png Using ``props`` instead of default background coloring >>> df.style.highlight_between( - ... left=1.5, right=3.5, - ... props='font-weight:bold;color:#e83e8c') # doctest: +SKIP + ... left=1.5, right=3.5, props="font-weight:bold;color:#e83e8c" + ... ) # doctest: +SKIP .. figure:: ../../_static/style/hbetw_props.png """ @@ -3529,8 +3566,11 @@ def highlight_quantile( Use ``props`` instead of default background coloring >>> df.style.highlight_quantile( - ... axis=None, q_left=0.2, q_right=0.8, - ... props='font-weight:bold;color:#e83e8c') # doctest: +SKIP + ... axis=None, + ... q_left=0.2, + ... q_right=0.8, + ... props="font-weight:bold;color:#e83e8c", + ... ) # doctest: +SKIP .. figure:: ../../_static/style/hq_props.png """ @@ -3602,9 +3642,10 @@ def from_custom_template( Examples -------- >>> from pandas.io.formats.style import Styler - >>> EasyStyler = Styler.from_custom_template("path/to/template", - ... "template.tpl", - ... ) # doctest: +SKIP + >>> EasyStyler = Styler.from_custom_template( + ... "path/to/template", + ... "template.tpl", + ... ) # doctest: +SKIP >>> df = pd.DataFrame({"A": [1, 2]}) >>> EasyStyler(df) # doctest: +SKIP @@ -3688,9 +3729,7 @@ def pipe( .. code-block:: python - (df.style.format(precision=3) - .pipe(g, arg1=a) - .pipe(f, arg2=b, arg3=c)) + (df.style.format(precision=3).pipe(g, arg1=a).pipe(f, arg2=b, arg3=c)) In particular, this allows users to define functions that take a styler object, along with other parameters, and return the styler after @@ -3718,9 +3757,11 @@ def pipe( Since the method returns a ``Styler`` object it can be chained with other methods as if applying the underlying highlighters directly. - >>> (df.style.format("{:.1f}") + >>> ( + ... df.style.format("{:.1f}") ... .pipe(some_highlights, min_color="green") - ... .highlight_between(left=2, right=5)) # doctest: +SKIP + ... .highlight_between(left=2, right=5) + ... ) # doctest: +SKIP .. figure:: ../../_static/style/df_pipe_hl2.png @@ -3739,8 +3780,9 @@ def pipe( >>> def highlight_last_level(styler): ... return styler.apply_index( - ... lambda v: "background-color: pink; color: yellow", axis="columns", - ... level=styler.columns.nlevels - 1 + ... lambda v: "background-color: pink; color: yellow", + ... axis="columns", + ... level=styler.columns.nlevels - 1, ... ) # doctest: +SKIP >>> df.columns = pd.MultiIndex.from_product([["A", "B"], ["X", "Y"]]) >>> df.style.pipe(highlight_last_level) # doctest: +SKIP @@ -3757,6 +3799,7 @@ def pipe( ... return np.where( ... styler.data.isna().any(), "background-color: red;", "" ... ) + ... ... return styler.apply_index(dynamic_highlight, axis=1, level=level) >>> df.style.pipe(highlight_header_missing, level=1) # doctest: +SKIP diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 4ba094ec614d0..1cf54dc2cc756 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -1449,10 +1449,10 @@ def relabel_index( # relabel first, then hide df = pd.DataFrame({"col": ["a", "b", "c"]}) - df.style.relabel_index(["A", "B", "C"]).hide([0,1]) + df.style.relabel_index(["A", "B", "C"]).hide([0, 1]) # hide first, then relabel df = pd.DataFrame({"col": ["a", "b", "c"]}) - df.style.hide([0,1]).relabel_index(["C"]) + df.style.hide([0, 1]).relabel_index(["C"]) This method should be used, rather than :meth:`Styler.format_index`, in one of the following cases (see examples): @@ -1493,8 +1493,9 @@ def relabel_index( 1 5 1 0 6 1 7 - >>> styler.hide((midx.get_level_values(0) == 0) | - ... (midx.get_level_values(1) == 0)) + >>> styler.hide( + ... (midx.get_level_values(0) == 0) | (midx.get_level_values(1) == 0) + ... ) ... # doctest: +SKIP >>> styler.hide(level=[0, 1]) # doctest: +SKIP >>> styler.relabel_index(["binary6", "binary7"]) # doctest: +SKIP @@ -2154,10 +2155,12 @@ def _parse_latex_table_styles(table_styles: CSSStyles, selector: str) -> str | N Examples -------- - >>> table_styles = [{'selector': 'foo', 'props': [('attr','value')]}, - ... {'selector': 'bar', 'props': [('attr', 'overwritten')]}, - ... {'selector': 'bar', 'props': [('a1', 'baz'), ('a2', 'ignore')]}] - >>> _parse_latex_table_styles(table_styles, selector='bar') + >>> table_styles = [ + ... {"selector": "foo", "props": [("attr", "value")]}, + ... {"selector": "bar", "props": [("attr", "overwritten")]}, + ... {"selector": "bar", "props": [("a1", "baz"), ("a2", "ignore")]}, + ... ] + >>> _parse_latex_table_styles(table_styles, selector="bar") 'baz' Notes @@ -2241,8 +2244,8 @@ def _parse_latex_header_span( Examples -------- - >>> cell = {'cellstyle': '', 'display_value':'text', 'attributes': 'colspan="3"'} - >>> _parse_latex_header_span(cell, 't', 'c') + >>> cell = {"cellstyle": "", "display_value": "text", "attributes": 'colspan="3"'} + >>> _parse_latex_header_span(cell, "t", "c") '\\multicolumn{3}{c}{text}' """ display_val = _parse_latex_cell_styles( diff --git a/pandas/io/html.py b/pandas/io/html.py index 302f901aa0d16..adcb78d3fb7d1 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -1100,13 +1100,13 @@ def read_html( passed to lxml or Beautiful Soup. However, these attributes must be valid HTML table attributes to work correctly. For example, :: - attrs = {{'id': 'table'}} + attrs = {{"id": "table"}} is a valid attribute dictionary because the 'id' HTML tag attribute is a valid HTML attribute for *any* HTML tag as per `this document `__. :: - attrs = {{'asdf': 'table'}} + attrs = {{"asdf": "table"}} is *not* a valid attribute dictionary because 'asdf' is not a valid HTML attribute even if it is a valid XML attribute. Valid HTML 4.01 diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index a6d58d6cffb10..e9f2e319c0136 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -594,9 +594,7 @@ def read_parquet( Examples -------- - >>> original_df = pd.DataFrame( - ... {{"foo": range(5), "bar": range(5, 10)}} - ... ) + >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) >>> original_df foo bar 0 0 5 @@ -624,7 +622,7 @@ def read_parquet( 2 7 3 8 4 9 - >>> restored_bar.equals(original_df[['bar']]) + >>> restored_bar.equals(original_df[["bar"]]) True The function uses `kwargs` that are passed directly to the engine. diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index f24d7a628998e..67f3e5a9f4880 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -396,7 +396,7 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict: def ensure_dtype_objs( - dtype: DtypeArg | dict[Hashable, DtypeArg] | None + dtype: DtypeArg | dict[Hashable, DtypeArg] | None, ) -> DtypeObj | dict[Hashable, DtypeObj] | None: """ Ensure we have either None, a dtype object, or a dictionary mapping to diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 71e1a31759a0c..07920eb1750f2 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1515,7 +1515,7 @@ def read_fwf( Examples -------- - >>> pd.read_fwf('data.csv') # doctest: +SKIP + >>> pd.read_fwf("data.csv") # doctest: +SKIP """ # Check input arguments. if colspecs is None and widths is None: diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 89867ab4f19d0..d3e93ebeb8fbb 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -78,7 +78,9 @@ def to_pickle( Examples -------- - >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP + >>> original_df = pd.DataFrame( + ... {{"foo": range(5), "bar": range(5, 10)}} + ... ) # doctest: +SKIP >>> original_df # doctest: +SKIP foo bar 0 0 5 @@ -96,7 +98,7 @@ def to_pickle( 2 2 7 3 3 8 4 4 9 - """ # noqa: E501 + """ if protocol < 0: protocol = pickle.HIGHEST_PROTOCOL diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 0baf642495584..1e11a9783f0e1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -384,9 +384,9 @@ def read_hdf( Examples -------- - >>> df = pd.DataFrame([[1, 1.0, 'a']], columns=['x', 'y', 'z']) # doctest: +SKIP - >>> df.to_hdf('./store.h5', 'data') # doctest: +SKIP - >>> reread = pd.read_hdf('./store.h5') # doctest: +SKIP + >>> df = pd.DataFrame([[1, 1.0, "a"]], columns=["x", "y", "z"]) # doctest: +SKIP + >>> df.to_hdf("./store.h5", "data") # doctest: +SKIP + >>> reread = pd.read_hdf("./store.h5") # doctest: +SKIP """ if mode not in ["r", "r+", "a"]: raise ValueError( @@ -527,9 +527,9 @@ class HDFStore: Examples -------- >>> bar = pd.DataFrame(np.random.randn(10, 4)) - >>> store = pd.HDFStore('test.h5') - >>> store['foo'] = bar # write to HDF5 - >>> bar = store['foo'] # retrieve + >>> store = pd.HDFStore("test.h5") + >>> store["foo"] = bar # write to HDF5 + >>> bar = store["foo"] # retrieve >>> store.close() **Create or load HDF5 file in-memory** @@ -539,9 +539,9 @@ class HDFStore: written when closed: >>> bar = pd.DataFrame(np.random.randn(10, 4)) - >>> store = pd.HDFStore('test.h5', driver='H5FD_CORE') - >>> store['foo'] = bar - >>> store.close() # only now, data is written to disk + >>> store = pd.HDFStore("test.h5", driver="H5FD_CORE") + >>> store["foo"] = bar + >>> store.close() # only now, data is written to disk """ _handle: File | None @@ -665,10 +665,10 @@ def keys(self, include: str = "pandas") -> list[str]: Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df) # doctest: +SKIP - >>> store.get('data') # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df) # doctest: +SKIP + >>> store.get("data") # doctest: +SKIP >>> print(store.keys()) # doctest: +SKIP ['/data1', '/data2'] >>> store.close() # doctest: +SKIP @@ -794,10 +794,10 @@ def get(self, key: str): Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df) # doctest: +SKIP - >>> store.get('data') # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df) # doctest: +SKIP + >>> store.get("data") # doctest: +SKIP >>> store.close() # doctest: +SKIP """ with patch_pickle(): @@ -856,17 +856,17 @@ def select( Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df) # doctest: +SKIP - >>> store.get('data') # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df) # doctest: +SKIP + >>> store.get("data") # doctest: +SKIP >>> print(store.keys()) # doctest: +SKIP ['/data1', '/data2'] - >>> store.select('/data1') # doctest: +SKIP + >>> store.select("/data1") # doctest: +SKIP A B 0 1 2 1 3 4 - >>> store.select('/data1', where='columns == A') # doctest: +SKIP + >>> store.select("/data1", where="columns == A") # doctest: +SKIP A 0 1 1 3 @@ -1146,9 +1146,9 @@ def put( Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df) # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df) # doctest: +SKIP """ if format is None: format = get_option("io.hdf.default_format") or "fixed" @@ -1288,11 +1288,11 @@ def append( Examples -------- - >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df1, format='table') # doctest: +SKIP - >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['A', 'B']) - >>> store.append('data', df2) # doctest: +SKIP + >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df1, format="table") # doctest: +SKIP + >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["A", "B"]) + >>> store.append("data", df2) # doctest: +SKIP >>> store.close() # doctest: +SKIP A B 0 1 2 @@ -1479,9 +1479,9 @@ def groups(self) -> list: Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df) # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df) # doctest: +SKIP >>> print(store.groups()) # doctest: +SKIP >>> store.close() # doctest: +SKIP [/data (Group) '' @@ -1534,11 +1534,11 @@ def walk(self, where: str = "/") -> Iterator[tuple[str, list[str], list[str]]]: Examples -------- - >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df1, format='table') # doctest: +SKIP - >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=['A', 'B']) - >>> store.append('data', df2) # doctest: +SKIP + >>> df1 = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df1, format="table") # doctest: +SKIP + >>> df2 = pd.DataFrame([[5, 6], [7, 8]], columns=["A", "B"]) + >>> store.append("data", df2) # doctest: +SKIP >>> store.close() # doctest: +SKIP >>> for group in store.walk(): # doctest: +SKIP ... print(group) # doctest: +SKIP @@ -1660,9 +1660,9 @@ def info(self) -> str: Examples -------- - >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=['A', 'B']) - >>> store = pd.HDFStore("store.h5", 'w') # doctest: +SKIP - >>> store.put('data', df) # doctest: +SKIP + >>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + >>> store = pd.HDFStore("store.h5", "w") # doctest: +SKIP + >>> store.put("data", df) # doctest: +SKIP >>> print(store.info()) # doctest: +SKIP >>> store.close() # doctest: +SKIP diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b4330c717d368..08f99a4d3093a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -343,7 +343,7 @@ def read_sql_table( Examples -------- - >>> pd.read_sql_table('table_name', 'postgres:///db_name') # doctest:+SKIP + >>> pd.read_sql_table("table_name", "postgres:///db_name") # doctest:+SKIP """ check_dtype_backend(dtype_backend) @@ -637,24 +637,28 @@ def read_sql( providing only the SQL tablename will result in an error. >>> from sqlite3 import connect - >>> conn = connect(':memory:') - >>> df = pd.DataFrame(data=[[0, '10/11/12'], [1, '12/11/10']], - ... columns=['int_column', 'date_column']) - >>> df.to_sql(name='test_data', con=conn) + >>> conn = connect(":memory:") + >>> df = pd.DataFrame( + ... data=[[0, "10/11/12"], [1, "12/11/10"]], + ... columns=["int_column", "date_column"], + ... ) + >>> df.to_sql(name="test_data", con=conn) 2 - >>> pd.read_sql('SELECT int_column, date_column FROM test_data', conn) + >>> pd.read_sql("SELECT int_column, date_column FROM test_data", conn) int_column date_column 0 0 10/11/12 1 1 12/11/10 - >>> pd.read_sql('test_data', 'postgres:///db_name') # doctest:+SKIP + >>> pd.read_sql("test_data", "postgres:///db_name") # doctest:+SKIP For parameterized query, using ``params`` is recommended over string interpolation. >>> from sqlalchemy import text - >>> sql = text('SELECT int_column, date_column FROM test_data WHERE int_column=:int_val') - >>> pd.read_sql(sql, conn, params={'int_val': 1}) # doctest:+SKIP + >>> sql = text( + ... "SELECT int_column, date_column FROM test_data WHERE int_column=:int_val" + ... ) + >>> pd.read_sql(sql, conn, params={"int_val": 1}) # doctest:+SKIP int_column date_column 0 1 12/11/10 @@ -663,9 +667,11 @@ def read_sql( Custom argument values for applying ``pd.to_datetime`` on a column are specified via a dictionary format: - >>> pd.read_sql('SELECT int_column, date_column FROM test_data', - ... conn, - ... parse_dates={"date_column": {"format": "%d/%m/%y"}}) + >>> pd.read_sql( + ... "SELECT int_column, date_column FROM test_data", + ... conn, + ... parse_dates={"date_column": {"format": "%d/%m/%y"}}, + ... ) int_column date_column 0 0 2012-11-10 1 1 2010-11-12 @@ -675,12 +681,12 @@ def read_sql( pandas now supports reading via ADBC drivers >>> from adbc_driver_postgresql import dbapi # doctest:+SKIP - >>> with dbapi.connect('postgres:///db_name') as conn: # doctest:+SKIP - ... pd.read_sql('SELECT int_column FROM test_data', conn) + >>> with dbapi.connect("postgres:///db_name") as conn: # doctest:+SKIP + ... pd.read_sql("SELECT int_column FROM test_data", conn) int_column 0 0 1 1 - """ # noqa: E501 + """ check_dtype_backend(dtype_backend) if dtype_backend is lib.no_default: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 447c97d078e02..c2a3db2d44b16 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -254,7 +254,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series: Examples -------- >>> dates = pd.Series([52]) - >>> _stata_elapsed_date_to_datetime_vec(dates , "%tw") + >>> _stata_elapsed_date_to_datetime_vec(dates, "%tw") 0 1961-01-01 dtype: datetime64[s] @@ -1955,9 +1955,12 @@ def data_label(self) -> str: >>> time_stamp = pd.Timestamp(2000, 2, 29, 14, 21) >>> data_label = "This is a data file." >>> path = "/My_path/filename.dta" - >>> df.to_stata(path, time_stamp=time_stamp, # doctest: +SKIP - ... data_label=data_label, # doctest: +SKIP - ... version=None) # doctest: +SKIP + >>> df.to_stata( + ... path, + ... time_stamp=time_stamp, # doctest: +SKIP + ... data_label=data_label, # doctest: +SKIP + ... version=None, + ... ) # doctest: +SKIP >>> with pd.io.stata.StataReader(path) as reader: # doctest: +SKIP ... print(reader.data_label) # doctest: +SKIP This is a data file. @@ -1987,8 +1990,12 @@ def variable_labels(self) -> dict[str, str]: >>> time_stamp = pd.Timestamp(2000, 2, 29, 14, 21) >>> path = "/My_path/filename.dta" >>> variable_labels = {"col_1": "This is an example"} - >>> df.to_stata(path, time_stamp=time_stamp, # doctest: +SKIP - ... variable_labels=variable_labels, version=None) # doctest: +SKIP + >>> df.to_stata( + ... path, + ... time_stamp=time_stamp, # doctest: +SKIP + ... variable_labels=variable_labels, + ... version=None, + ... ) # doctest: +SKIP >>> with pd.io.stata.StataReader(path) as reader: # doctest: +SKIP ... print(reader.variable_labels()) # doctest: +SKIP {'index': '', 'col_1': 'This is an example', 'col_2': ''} @@ -2014,8 +2021,12 @@ def value_labels(self) -> dict[str, dict[float, str]]: >>> time_stamp = pd.Timestamp(2000, 2, 29, 14, 21) >>> path = "/My_path/filename.dta" >>> value_labels = {"col_1": {3: "x"}} - >>> df.to_stata(path, time_stamp=time_stamp, # doctest: +SKIP - ... value_labels=value_labels, version=None) # doctest: +SKIP + >>> df.to_stata( + ... path, + ... time_stamp=time_stamp, # doctest: +SKIP + ... value_labels=value_labels, + ... version=None, + ... ) # doctest: +SKIP >>> with pd.io.stata.StataReader(path) as reader: # doctest: +SKIP ... print(reader.value_labels()) # doctest: +SKIP {'col_1': {3: 'x'}} @@ -2272,19 +2283,19 @@ class StataWriter(StataParser): Examples -------- - >>> data = pd.DataFrame([[1.0, 1]], columns=['a', 'b']) - >>> writer = StataWriter('./data_file.dta', data) + >>> data = pd.DataFrame([[1.0, 1]], columns=["a", "b"]) + >>> writer = StataWriter("./data_file.dta", data) >>> writer.write_file() Directly write a zip file >>> compression = {{"method": "zip", "archive_name": "data_file.dta"}} - >>> writer = StataWriter('./data_file.zip', data, compression=compression) + >>> writer = StataWriter("./data_file.zip", data, compression=compression) >>> writer.write_file() Save a DataFrame with dates >>> from datetime import datetime - >>> data = pd.DataFrame([[datetime(2000,1,1)]], columns=['date']) - >>> writer = StataWriter('./date_data_file.dta', data, {{'date' : 'tw'}}) + >>> data = pd.DataFrame([[datetime(2000, 1, 1)]], columns=["date"]) + >>> writer = StataWriter("./date_data_file.dta", data, {{"date": "tw"}}) >>> writer.write_file() """ @@ -2655,18 +2666,22 @@ def write_file(self) -> None: Examples -------- - >>> df = pd.DataFrame({"fully_labelled": [1, 2, 3, 3, 1], - ... "partially_labelled": [1.0, 2.0, np.nan, 9.0, np.nan], - ... "Y": [7, 7, 9, 8, 10], - ... "Z": pd.Categorical(["j", "k", "l", "k", "j"]), - ... }) + >>> df = pd.DataFrame( + ... { + ... "fully_labelled": [1, 2, 3, 3, 1], + ... "partially_labelled": [1.0, 2.0, np.nan, 9.0, np.nan], + ... "Y": [7, 7, 9, 8, 10], + ... "Z": pd.Categorical(["j", "k", "l", "k", "j"]), + ... } + ... ) >>> path = "/My_path/filename.dta" - >>> labels = {"fully_labelled": {1: "one", 2: "two", 3: "three"}, - ... "partially_labelled": {1.0: "one", 2.0: "two"}, - ... } - >>> writer = pd.io.stata.StataWriter(path, - ... df, - ... value_labels=labels) # doctest: +SKIP + >>> labels = { + ... "fully_labelled": {1: "one", 2: "two", 3: "three"}, + ... "partially_labelled": {1.0: "one", 2.0: "two"}, + ... } + >>> writer = pd.io.stata.StataWriter( + ... path, df, value_labels=labels + ... ) # doctest: +SKIP >>> writer.write_file() # doctest: +SKIP >>> df = pd.read_stata(path) # doctest: +SKIP >>> df # doctest: +SKIP @@ -3226,22 +3241,24 @@ class StataWriter117(StataWriter): Examples -------- - >>> data = pd.DataFrame([[1.0, 1, 'a']], columns=['a', 'b', 'c']) - >>> writer = pd.io.stata.StataWriter117('./data_file.dta', data) + >>> data = pd.DataFrame([[1.0, 1, "a"]], columns=["a", "b", "c"]) + >>> writer = pd.io.stata.StataWriter117("./data_file.dta", data) >>> writer.write_file() Directly write a zip file >>> compression = {"method": "zip", "archive_name": "data_file.dta"} >>> writer = pd.io.stata.StataWriter117( - ... './data_file.zip', data, compression=compression - ... ) + ... "./data_file.zip", data, compression=compression + ... ) >>> writer.write_file() Or with long strings stored in strl format - >>> data = pd.DataFrame([['A relatively long string'], [''], ['']], - ... columns=['strls']) + >>> data = pd.DataFrame( + ... [["A relatively long string"], [""], [""]], columns=["strls"] + ... ) >>> writer = pd.io.stata.StataWriter117( - ... './data_file_with_long_strings.dta', data, convert_strl=['strls']) + ... "./data_file_with_long_strings.dta", data, convert_strl=["strls"] + ... ) >>> writer.write_file() """ @@ -3619,21 +3636,23 @@ class StataWriterUTF8(StataWriter117): Using Unicode data and column names >>> from pandas.io.stata import StataWriterUTF8 - >>> data = pd.DataFrame([[1.0, 1, 'ᴬ']], columns=['a', 'β', 'ĉ']) - >>> writer = StataWriterUTF8('./data_file.dta', data) + >>> data = pd.DataFrame([[1.0, 1, "ᴬ"]], columns=["a", "β", "ĉ"]) + >>> writer = StataWriterUTF8("./data_file.dta", data) >>> writer.write_file() Directly write a zip file >>> compression = {"method": "zip", "archive_name": "data_file.dta"} - >>> writer = StataWriterUTF8('./data_file.zip', data, compression=compression) + >>> writer = StataWriterUTF8("./data_file.zip", data, compression=compression) >>> writer.write_file() Or with long strings stored in strl format - >>> data = pd.DataFrame([['ᴀ relatively long ŝtring'], [''], ['']], - ... columns=['strls']) - >>> writer = StataWriterUTF8('./data_file_with_long_strings.dta', data, - ... convert_strl=['strls']) + >>> data = pd.DataFrame( + ... [["ᴀ relatively long ŝtring"], [""], [""]], columns=["strls"] + ... ) + >>> writer = StataWriterUTF8( + ... "./data_file_with_long_strings.dta", data, convert_strl=["strls"] + ... ) >>> writer.write_file() """ diff --git a/pandas/io/xml.py b/pandas/io/xml.py index 3faffbd21842f..97bf520a77611 100644 --- a/pandas/io/xml.py +++ b/pandas/io/xml.py @@ -916,9 +916,7 @@ def read_xml( Note: if XML document uses default namespace denoted as `xmlns=''` without a prefix, you must assign any temporary namespace prefix such as 'doc' to the URI in order to parse - underlying nodes and/or attributes. For example, :: - - namespaces = {{"doc": "https://example.com"}} + underlying nodes and/or attributes. elems_only : bool, optional, default False Parse only the child elements at the specified ``xpath``. By default, @@ -987,9 +985,7 @@ def read_xml( and unlike ``xpath``, descendants do not need to relate to each other but can exist any where in document under the repeating element. This memory- efficient method should be used for very large XML files (500MB, 1GB, or 5GB+). - For example, :: - - iterparse = {{"row_element": ["child_elem", "attr", "grandchild_elem"]}} + For example, ``{{"row_element": ["child_elem", "attr", "grandchild_elem"]}}``. .. versionadded:: 1.5.0 @@ -1118,9 +1114,11 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(StringIO(xml), - ... xpath="//doc:row", - ... namespaces={{"doc": "https://example.com"}}) + >>> df = pd.read_xml( + ... StringIO(xml), + ... xpath="//doc:row", + ... namespaces={{"doc": "https://example.com"}}, + ... ) >>> df shape degrees sides 0 square 360 4.0 @@ -1147,9 +1145,9 @@ def read_xml( ... ... ''' - >>> df = pd.read_xml(StringIO(xml_data), - ... dtype_backend="numpy_nullable", - ... parse_dates=["e"]) + >>> df = pd.read_xml( + ... StringIO(xml_data), dtype_backend="numpy_nullable", parse_dates=["e"] + ... ) >>> df index a b c d e 0 0 1 2.5 True a 2019-12-31 diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 7c02ffdbafcfa..51201eafb9475 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -112,7 +112,7 @@ def hist_series( .. plot:: :context: close-figs - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) >>> hist = ser.hist() @@ -121,7 +121,7 @@ def hist_series( .. plot:: :context: close-figs - >>> lst = ['a', 'a', 'a', 'b', 'b', 'b'] + >>> lst = ["a", "a", "a", "b", "b", "b"] >>> ser = pd.Series([1, 2, 2, 4, 6, 6], index=lst) >>> hist = ser.groupby(level=0).hist() """ @@ -241,12 +241,11 @@ def hist_frame( .. plot:: :context: close-figs - >>> data = {'length': [1.5, 0.5, 1.2, 0.9, 3], - ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]} - >>> index = ['pig', 'rabbit', 'duck', 'chicken', 'horse'] + >>> data = {"length": [1.5, 0.5, 1.2, 0.9, 3], "width": [0.7, 0.2, 0.15, 0.2, 1.1]} + >>> index = ["pig", "rabbit", "duck", "chicken", "horse"] >>> df = pd.DataFrame(data, index=index) >>> hist = df.hist(bins=3) - """ + """ # noqa: E501 plot_backend = _get_plot_backend(backend) return plot_backend.hist_frame( data, @@ -606,10 +605,10 @@ def boxplot_frame_groupby( >>> import itertools >>> tuples = [t for t in itertools.product(range(1000), range(4))] - >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> index = pd.MultiIndex.from_tuples(tuples, names=["lvl0", "lvl1"]) >>> data = np.random.randn(len(index), 4) - >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) - >>> grouped = df.groupby(level='lvl1') + >>> df = pd.DataFrame(data, columns=list("ABCD"), index=index) + >>> grouped = df.groupby(level="lvl1") >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8, 10)) # doctest: +SKIP The ``subplots=False`` option shows the boxplots in a single figure. @@ -802,16 +801,17 @@ class PlotAccessor(PandasObject): :context: close-figs >>> ser = pd.Series([1, 2, 3, 3]) - >>> plot = ser.plot(kind='hist', title="My plot") + >>> plot = ser.plot(kind="hist", title="My plot") For DataFrame: .. plot:: :context: close-figs - >>> df = pd.DataFrame({'length': [1.5, 0.5, 1.2, 0.9, 3], - ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]}, - ... index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) + >>> df = pd.DataFrame( + ... {"length": [1.5, 0.5, 1.2, 0.9, 3], "width": [0.7, 0.2, 0.15, 0.2, 1.1]}, + ... index=["pig", "rabbit", "duck", "chicken", "horse"], + ... ) >>> plot = df.plot(title="DataFrame Plot") For SeriesGroupBy: @@ -828,10 +828,9 @@ class PlotAccessor(PandasObject): .. plot:: :context: close-figs - >>> df = pd.DataFrame({"col1" : [1, 2, 3, 4], - ... "col2" : ["A", "B", "A", "B"]}) + >>> df = pd.DataFrame({"col1": [1, 2, 3, 4], "col2": ["A", "B", "A", "B"]}) >>> plot = df.groupby("col2").plot(kind="bar", title="DataFrameGroupBy Plot") - """ + """ # noqa: E501 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") _series_kinds = ("pie",) @@ -1347,7 +1346,7 @@ def box(self, by: IndexLabel | None = None, **kwargs) -> PlotAccessor: :context: close-figs >>> data = np.random.randn(25, 4) - >>> df = pd.DataFrame(data, columns=list('ABCD')) + >>> df = pd.DataFrame(data, columns=list("ABCD")) >>> ax = df.plot.box() You can also generate groupings if you specify the `by` parameter (which @@ -1410,8 +1409,8 @@ def hist( .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=['one']) - >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) + >>> df = pd.DataFrame(np.random.randint(1, 7, 6000), columns=["one"]) + >>> df["two"] = df["one"] + np.random.randint(1, 7, 6000) >>> ax = df.plot.hist(bins=12, alpha=0.5) A grouped histogram can be generated by providing the parameter `by` (which @@ -1509,10 +1508,12 @@ def kde( .. plot:: :context: close-figs - >>> df = pd.DataFrame({ - ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], - ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], - ... }) + >>> df = pd.DataFrame( + ... { + ... "x": [1, 2, 2.5, 3, 3.5, 4, 5], + ... "y": [4, 4, 4.5, 5, 5.5, 6, 6], + ... } + ... ) >>> ax = df.plot.kde() A scalar bandwidth can be specified. Using a small bandwidth value can @@ -1583,12 +1584,14 @@ def area( .. plot:: :context: close-figs - >>> df = pd.DataFrame({ - ... 'sales': [3, 2, 3, 9, 10, 6], - ... 'signups': [5, 5, 6, 12, 14, 13], - ... 'visits': [20, 42, 28, 62, 81, 50], - ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', - ... freq='ME')) + >>> df = pd.DataFrame( + ... { + ... "sales": [3, 2, 3, 9, 10, 6], + ... "signups": [5, 5, 6, 12, 14, 13], + ... "visits": [20, 42, 28, 62, 81, 50], + ... }, + ... index=pd.date_range(start="2018/01/01", end="2018/07/01", freq="ME"), + ... ) >>> ax = df.plot.area() Area plots are stacked by default. To produce an unstacked plot, @@ -1604,20 +1607,22 @@ def area( .. plot:: :context: close-figs - >>> ax = df.plot.area(y='sales') + >>> ax = df.plot.area(y="sales") Draw with a different `x`: .. plot:: :context: close-figs - >>> df = pd.DataFrame({ - ... 'sales': [3, 2, 3], - ... 'visits': [20, 42, 28], - ... 'day': [1, 2, 3], - ... }) - >>> ax = df.plot.area(x='day') - """ + >>> df = pd.DataFrame( + ... { + ... "sales": [3, 2, 3], + ... "visits": [20, 42, 28], + ... "day": [1, 2, 3], + ... } + ... ) + >>> ax = df.plot.area(x="day") + """ # noqa: E501 return self(kind="area", x=x, y=y, stacked=stacked, **kwargs) def pie(self, y: IndexLabel | None = None, **kwargs) -> PlotAccessor: @@ -1657,10 +1662,11 @@ def pie(self, y: IndexLabel | None = None, **kwargs) -> PlotAccessor: .. plot:: :context: close-figs - >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], - ... 'radius': [2439.7, 6051.8, 6378.1]}, - ... index=['Mercury', 'Venus', 'Earth']) - >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) + >>> df = pd.DataFrame( + ... {"mass": [0.330, 4.87, 5.97], "radius": [2439.7, 6051.8, 6378.1]}, + ... index=["Mercury", "Venus", "Earth"], + ... ) + >>> plot = df.plot.pie(y="mass", figsize=(5, 5)) .. plot:: :context: close-figs @@ -1748,22 +1754,26 @@ def scatter( .. plot:: :context: close-figs - >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], - ... [6.4, 3.2, 1], [5.9, 3.0, 2]], - ... columns=['length', 'width', 'species']) - >>> ax1 = df.plot.scatter(x='length', - ... y='width', - ... c='DarkBlue') + >>> df = pd.DataFrame( + ... [ + ... [5.1, 3.5, 0], + ... [4.9, 3.0, 0], + ... [7.0, 3.2, 1], + ... [6.4, 3.2, 1], + ... [5.9, 3.0, 2], + ... ], + ... columns=["length", "width", "species"], + ... ) + >>> ax1 = df.plot.scatter(x="length", y="width", c="DarkBlue") And now with the color determined by a column as well. .. plot:: :context: close-figs - >>> ax2 = df.plot.scatter(x='length', - ... y='width', - ... c='species', - ... colormap='viridis') + >>> ax2 = df.plot.scatter( + ... x="length", y="width", c="species", colormap="viridis" + ... ) """ return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) @@ -1832,9 +1842,8 @@ def hexbin( :context: close-figs >>> n = 10000 - >>> df = pd.DataFrame({'x': np.random.randn(n), - ... 'y': np.random.randn(n)}) - >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) + >>> df = pd.DataFrame({"x": np.random.randn(n), "y": np.random.randn(n)}) + >>> ax = df.plot.hexbin(x="x", y="y", gridsize=20) The next example uses `C` and `np.sum` as `reduce_C_function`. Note that `'observations'` values ranges from 1 to 5 but the result @@ -1845,17 +1854,21 @@ def hexbin( :context: close-figs >>> n = 500 - >>> df = pd.DataFrame({ - ... 'coord_x': np.random.uniform(-3, 3, size=n), - ... 'coord_y': np.random.uniform(30, 50, size=n), - ... 'observations': np.random.randint(1, 5, size=n) - ... }) - >>> ax = df.plot.hexbin(x='coord_x', - ... y='coord_y', - ... C='observations', - ... reduce_C_function=np.sum, - ... gridsize=10, - ... cmap="viridis") + >>> df = pd.DataFrame( + ... { + ... "coord_x": np.random.uniform(-3, 3, size=n), + ... "coord_y": np.random.uniform(30, 50, size=n), + ... "observations": np.random.randint(1, 5, size=n), + ... } + ... ) + >>> ax = df.plot.hexbin( + ... x="coord_x", + ... y="coord_y", + ... C="observations", + ... reduce_C_function=np.sum, + ... gridsize=10, + ... cmap="viridis", + ... ) """ if reduce_C_function is not None: kwargs["reduce_C_function"] = reduce_C_function diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 6fa75ba5fb12d..1c8cd9a4970c8 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -465,7 +465,7 @@ def _validate_color_args(self, color, colormap): @final @staticmethod def _iter_data( - data: DataFrame | dict[Hashable, Series | DataFrame] + data: DataFrame | dict[Hashable, Series | DataFrame], ) -> Iterator[tuple[Hashable, np.ndarray]]: for col, values in data.items(): # This was originally written to use values.values before EAs diff --git a/pandas/plotting/_matplotlib/groupby.py b/pandas/plotting/_matplotlib/groupby.py index cbb66065a8039..783f79710097c 100644 --- a/pandas/plotting/_matplotlib/groupby.py +++ b/pandas/plotting/_matplotlib/groupby.py @@ -50,10 +50,9 @@ def create_iter_data_given_by( If `by` is assigned: >>> import numpy as np - >>> tuples = [('h1', 'a'), ('h1', 'b'), ('h2', 'a'), ('h2', 'b')] + >>> tuples = [("h1", "a"), ("h1", "b"), ("h2", "a"), ("h2", "b")] >>> mi = pd.MultiIndex.from_tuples(tuples) - >>> value = [[1, 3, np.nan, np.nan], - ... [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]] + >>> value = [[1, 3, np.nan, np.nan], [3, 4, np.nan, np.nan], [np.nan, np.nan, 5, 6]] >>> data = pd.DataFrame(value, columns=mi) >>> create_iter_data_given_by(data) {'h1': h1 @@ -106,9 +105,9 @@ def reconstruct_data_with_by( Examples -------- - >>> d = {'h': ['h1', 'h1', 'h2'], 'a': [1, 3, 5], 'b': [3, 4, 6]} + >>> d = {"h": ["h1", "h1", "h2"], "a": [1, 3, 5], "b": [3, 4, 6]} >>> df = pd.DataFrame(d) - >>> reconstruct_data_with_by(df, by='h', cols=['a', 'b']) + >>> reconstruct_data_with_by(df, by="h", cols=["a", "b"]) h1 h2 a b a b 0 1.0 3.0 NaN NaN diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 89a8a7cf79719..50cfdbd967ea7 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -98,13 +98,14 @@ def _get_layout( nrows, ncols = layout if nrows == -1 and ncols > 0: - layout = nrows, ncols = (ceil(nplots / ncols), ncols) + layout = (ceil(nplots / ncols), ncols) elif ncols == -1 and nrows > 0: - layout = nrows, ncols = (nrows, ceil(nplots / nrows)) + layout = (nrows, ceil(nplots / nrows)) elif ncols <= 0 and nrows <= 0: msg = "At least one dimension of layout must be positive" raise ValueError(msg) + nrows, ncols = layout if nrows * ncols < nplots: raise ValueError( f"Layout of {nrows}x{ncols} must be larger than required size {nplots}" diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index c8c8f68f5289e..eb2d12e588b8f 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -51,12 +51,13 @@ def table(ax: Axes, data: DataFrame | Series, **kwargs) -> Table: :context: close-figs >>> import matplotlib.pyplot as plt - >>> df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) + >>> df = pd.DataFrame({"A": [1, 2], "B": [3, 4]}) >>> fix, ax = plt.subplots() - >>> ax.axis('off') + >>> ax.axis("off") (0.0, 1.0, 0.0, 1.0) - >>> table = pd.plotting.table(ax, df, loc='center', - ... cellLoc='center', colWidths=list([.2, .2])) + >>> table = pd.plotting.table( + ... ax, df, loc="center", cellLoc="center", colWidths=list([0.2, 0.2]) + ... ) """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.table( @@ -92,16 +93,17 @@ def register() -> None: >>> pd.plotting.register_matplotlib_converters() - >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'), - ... 'y': [1, 2] - ... }) - >>> plot = df.plot.line(x='ts', y='y') + >>> df = pd.DataFrame( + ... {"ts": pd.period_range("2020", periods=2, freq="M"), "y": [1, 2]} + ... ) + >>> plot = df.plot.line(x="ts", y="y") Unsetting the register manually an error will be raised: - >>> pd.set_option("plotting.matplotlib.register_converters", - ... False) # doctest: +SKIP - >>> df.plot.line(x='ts', y='y') # doctest: +SKIP + >>> pd.set_option( + ... "plotting.matplotlib.register_converters", False + ... ) # doctest: +SKIP + >>> df.plot.line(x="ts", y="y") # doctest: +SKIP Traceback (most recent call last): TypeError: float() argument must be a string or a real number, not 'Period' """ @@ -135,16 +137,17 @@ def deregister() -> None: >>> pd.plotting.register_matplotlib_converters() - >>> df = pd.DataFrame({'ts': pd.period_range('2020', periods=2, freq='M'), - ... 'y': [1, 2] - ... }) - >>> plot = df.plot.line(x='ts', y='y') + >>> df = pd.DataFrame( + ... {"ts": pd.period_range("2020", periods=2, freq="M"), "y": [1, 2]} + ... ) + >>> plot = df.plot.line(x="ts", y="y") Unsetting the register manually an error will be raised: - >>> pd.set_option("plotting.matplotlib.register_converters", - ... False) # doctest: +SKIP - >>> df.plot.line(x='ts', y='y') # doctest: +SKIP + >>> pd.set_option( + ... "plotting.matplotlib.register_converters", False + ... ) # doctest: +SKIP + >>> df.plot.line(x="ts", y="y") # doctest: +SKIP Traceback (most recent call last): TypeError: float() argument must be a string or a real number, not 'Period' """ @@ -204,7 +207,7 @@ def scatter_matrix( .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=['A', 'B', 'C', 'D']) + >>> df = pd.DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"]) >>> pd.plotting.scatter_matrix(df, alpha=0.2) array([[, , , ], @@ -288,25 +291,25 @@ def radviz( >>> df = pd.DataFrame( ... { - ... 'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], - ... 'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], - ... 'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], - ... 'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], - ... 'Category': [ - ... 'virginica', - ... 'virginica', - ... 'setosa', - ... 'virginica', - ... 'virginica', - ... 'versicolor', - ... 'versicolor', - ... 'setosa', - ... 'virginica', - ... 'setosa' - ... ] + ... "SepalLength": [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6, 6.7, 4.6], + ... "SepalWidth": [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2, 3.3, 3.6], + ... "PetalLength": [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4, 5.7, 1.0], + ... "PetalWidth": [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2, 2.1, 0.2], + ... "Category": [ + ... "virginica", + ... "virginica", + ... "setosa", + ... "virginica", + ... "virginica", + ... "versicolor", + ... "versicolor", + ... "setosa", + ... "virginica", + ... "setosa", + ... ], ... } ... ) - >>> pd.plotting.radviz(df, 'Category') # doctest: +SKIP + >>> pd.plotting.radviz(df, "Category") # doctest: +SKIP """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.radviz( @@ -371,10 +374,10 @@ def andrews_curves( :context: close-figs >>> df = pd.read_csv( - ... 'https://raw.githubusercontent.com/pandas-dev/' - ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' + ... "https://raw.githubusercontent.com/pandas-dev/" + ... "pandas/main/pandas/tests/io/data/csv/iris.csv" ... ) - >>> pd.plotting.andrews_curves(df, 'Name') # doctest: +SKIP + >>> pd.plotting.andrews_curves(df, "Name") # doctest: +SKIP """ plot_backend = _get_plot_backend("matplotlib") return plot_backend.andrews_curves( @@ -502,11 +505,11 @@ def parallel_coordinates( :context: close-figs >>> df = pd.read_csv( - ... 'https://raw.githubusercontent.com/pandas-dev/' - ... 'pandas/main/pandas/tests/io/data/csv/iris.csv' + ... "https://raw.githubusercontent.com/pandas-dev/" + ... "pandas/main/pandas/tests/io/data/csv/iris.csv" ... ) >>> pd.plotting.parallel_coordinates( - ... df, 'Name', color=('#556270', '#4ECDC4', '#C7F464') + ... df, "Name", color=("#556270", "#4ECDC4", "#C7F464") ... ) # doctest: +SKIP """ plot_backend = _get_plot_backend("matplotlib") @@ -620,10 +623,10 @@ class _Options(dict): :context: close-figs >>> np.random.seed(42) - >>> df = pd.DataFrame({'A': np.random.randn(10), - ... 'B': np.random.randn(10)}, - ... index=pd.date_range("1/1/2000", - ... freq='4MS', periods=10)) + >>> df = pd.DataFrame( + ... {"A": np.random.randn(10), "B": np.random.randn(10)}, + ... index=pd.date_range("1/1/2000", freq="4MS", periods=10), + ... ) >>> with pd.plotting.plot_params.use("x_compat", True): ... _ = df["A"].plot(color="r") ... _ = df["B"].plot(color="g") diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 85f15795cdfb5..2be6bba475af7 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -260,7 +260,7 @@ def test_join_dtypes_all_nan(any_numeric_ea_dtype): def test_join_index_levels(): # GH#53093 - midx = midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")]) + midx = MultiIndex.from_tuples([("a", "2019-02-01"), ("a", "2019-02-01")]) midx2 = MultiIndex.from_tuples([("a", "2019-01-31")]) result = midx.join(midx2, how="outer") expected = MultiIndex.from_tuples( diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 96a0ccc33808a..e2d4a0bac9559 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -196,7 +196,7 @@ def create_mgr(descr, item_shape=None): * components with same DTYPE_ID are combined into single block * to force multiple blocks with same dtype, use '-SUFFIX':: - 'a:f8-1; b:f8-2; c:f8-foobar' + "a:f8-1; b:f8-2; c:f8-foobar" """ if item_shape is None: diff --git a/pandas/tests/io/xml/conftest.py b/pandas/tests/io/xml/conftest.py index 273b1a3beef3b..40a94f27e98a9 100644 --- a/pandas/tests/io/xml/conftest.py +++ b/pandas/tests/io/xml/conftest.py @@ -11,7 +11,7 @@ def xml_data_path(): Examples -------- >>> def test_read_xml(xml_data_path): - ... read_xml(xml_data_path / 'file.xsl') + ... read_xml(xml_data_path / "file.xsl") """ return Path(__file__).parent.parent / "data" / "xml" diff --git a/pandas/tests/strings/conftest.py b/pandas/tests/strings/conftest.py index 036e4de20ba53..92b7b16da3c1f 100644 --- a/pandas/tests/strings/conftest.py +++ b/pandas/tests/strings/conftest.py @@ -122,7 +122,7 @@ def any_string_method(request): Examples -------- >>> def test_something(any_string_method): - ... s = Series(['a', 'b', np.nan, 'd']) + ... s = Series(["a", "b", np.nan, "d"]) ... ... method_name, args, kwargs = any_string_method ... method = getattr(s.str, method_name) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 4a1a668426b36..92b4bcc17946f 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -111,7 +111,7 @@ def infer_freq( Examples -------- - >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30) + >>> idx = pd.date_range(start="2020/12/01", end="2020/12/30", periods=30) >>> pd.infer_freq(idx) 'D' """ diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 650e77b264d14..50d0d33f0339f 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -200,8 +200,10 @@ class from pandas.tseries.offsets Holiday: July 3rd (month=7, day=3, ) >>> NewYears = pd.tseries.holiday.Holiday( - ... "New Years Day", month=1, day=1, - ... observance=pd.tseries.holiday.nearest_workday + ... "New Years Day", + ... month=1, + ... day=1, + ... observance=pd.tseries.holiday.nearest_workday, ... ) >>> NewYears # doctest: +SKIP Holiday: New Years Day ( @@ -209,8 +211,7 @@ class from pandas.tseries.offsets ) >>> July3rd = pd.tseries.holiday.Holiday( - ... "July 3rd", month=7, day=3, - ... days_of_week=(0, 1, 2, 3) + ... "July 3rd", month=7, day=3, days_of_week=(0, 1, 2, 3) ... ) >>> July3rd Holiday: July 3rd (month=7, day=3, ) diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py index 83c9a66cbd2ca..a15e2054205f7 100644 --- a/pandas/util/_decorators.py +++ b/pandas/util/_decorators.py @@ -122,44 +122,41 @@ def deprecate_kwarg( -------- The following deprecates 'cols', using 'columns' instead - >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name='columns') - ... def f(columns=''): + >>> @deprecate_kwarg(old_arg_name="cols", new_arg_name="columns") + ... def f(columns=""): ... print(columns) - ... - >>> f(columns='should work ok') + >>> f(columns="should work ok") should work ok - >>> f(cols='should raise warning') # doctest: +SKIP + >>> f(cols="should raise warning") # doctest: +SKIP FutureWarning: cols is deprecated, use columns instead warnings.warn(msg, FutureWarning) should raise warning - >>> f(cols='should error', columns="can\'t pass do both") # doctest: +SKIP + >>> f(cols="should error", columns="can't pass do both") # doctest: +SKIP TypeError: Can only specify 'cols' or 'columns', not both - >>> @deprecate_kwarg('old', 'new', {'yes': True, 'no': False}) + >>> @deprecate_kwarg("old", "new", {"yes": True, "no": False}) ... def f(new=False): - ... print('yes!' if new else 'no!') - ... - >>> f(old='yes') # doctest: +SKIP + ... print("yes!" if new else "no!") + >>> f(old="yes") # doctest: +SKIP FutureWarning: old='yes' is deprecated, use new=True instead warnings.warn(msg, FutureWarning) yes! To raise a warning that a keyword will be removed entirely in the future - >>> @deprecate_kwarg(old_arg_name='cols', new_arg_name=None) - ... def f(cols='', another_param=''): + >>> @deprecate_kwarg(old_arg_name="cols", new_arg_name=None) + ... def f(cols="", another_param=""): ... print(cols) - ... - >>> f(cols='should raise warning') # doctest: +SKIP + >>> f(cols="should raise warning") # doctest: +SKIP FutureWarning: the 'cols' keyword is deprecated and will be removed in a future version please takes steps to stop use of 'cols' should raise warning - >>> f(another_param='should not raise warning') # doctest: +SKIP + >>> f(another_param="should not raise warning") # doctest: +SKIP should not raise warning - >>> f(cols='should raise warning', another_param='') # doctest: +SKIP + >>> f(cols="should raise warning", another_param="") # doctest: +SKIP FutureWarning: the 'cols' keyword is deprecated and will be removed in a future version please takes steps to stop use of 'cols' should raise warning diff --git a/pyproject.toml b/pyproject.toml index bd7172ec85132..c0d8c859d0c12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -346,6 +346,9 @@ exclude = [ fixture-parentheses = false mark-parentheses = false +[tool.ruff.format] +docstring-code-format = true + [tool.pylint.messages_control] max-line-length = 88 disable = [ diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index d54592252206e..a4d53d360a12b 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -193,7 +193,7 @@ def validate_pep8(self): "flake8", "--format=%(row)d\t%(col)d\t%(code)s\t%(text)s", "--max-line-length=88", - "--ignore=E203,E3,W503,W504,E402,E731", + "--ignore=E203,E3,W503,W504,E402,E731,E128,E124", file.name, ] response = subprocess.run(cmd, capture_output=True, check=False, text=True)