From 4e6b9cee050a294bd4edd67e4ab6e454c9ff720a Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sun, 5 May 2024 01:02:58 +0530 Subject: [PATCH 1/3] DOC: Enforce Numpy Docstring Validation for pandas.Interval.mid (#58572) * DOC: add SA01 for pandas.Interval.mid * DOC: remove SA01 for pandas.Interval.mid --- ci/code_checks.sh | 1 - pandas/_libs/interval.pyx | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 936e3664cfe93..955fa5db17adc 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -89,7 +89,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Interval PR02" \ -i "pandas.Interval.closed SA01" \ -i "pandas.Interval.left SA01" \ - -i "pandas.Interval.mid SA01" \ -i "pandas.Interval.right SA01" \ -i "pandas.IntervalIndex.closed SA01" \ -i "pandas.IntervalIndex.contains RT03" \ diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index a37ab45dd57ed..8ad0887885f26 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -167,6 +167,12 @@ cdef class IntervalMixin: """ Return the midpoint of the Interval. + See Also + -------- + Interval.left : Return the left bound for the interval. + Interval.right : Return the right bound for the interval. + Interval.length : Return the length of the interval. + Examples -------- >>> iv = pd.Interval(0, 5) From 006ed9822ed8063ea23f71cb57696d28b8278cf0 Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sun, 5 May 2024 01:03:56 +0530 Subject: [PATCH 2/3] DOC: Enforce Numpy Docstring Validation for pandas.Series.max (#58573) * DOC: add RT03 for pandas.Series.max * DOC: remove RT03 for pandas.Series.max --- ci/code_checks.sh | 1 - pandas/core/series.py | 60 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 955fa5db17adc..b7ba141702261 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -206,7 +206,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.list.flatten SA01" \ -i "pandas.Series.list.len SA01" \ -i "pandas.Series.lt PR07,SA01" \ - -i "pandas.Series.max RT03" \ -i "pandas.Series.mean RT03,SA01" \ -i "pandas.Series.median RT03,SA01" \ -i "pandas.Series.min RT03" \ diff --git a/pandas/core/series.py b/pandas/core/series.py index 8474fd0d74919..8ee32d915420b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6157,7 +6157,6 @@ def min( ) @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="max") - @doc(make_doc("max", ndim=1)) def max( self, axis: Axis | None = 0, @@ -6165,6 +6164,65 @@ def max( numeric_only: bool = False, **kwargs, ): + """ + Return the maximum of the values over the requested axis. + + If you want the *index* of the maximum, use ``idxmax``. + This is the equivalent of the ``numpy.ndarray`` method ``argmax``. + + Parameters + ---------- + axis : {index (0)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + scalar or Series (if level specified) + The maximum of the values in the Series. + + See Also + -------- + numpy.max : Equivalent numpy function for arrays. + Series.min : Return the minimum. + Series.max : Return the maximum. + Series.idxmin : Return the index of the minimum. + Series.idxmax : Return the index of the maximum. + DataFrame.min : Return the minimum over the requested axis. + DataFrame.max : Return the maximum over the requested axis. + DataFrame.idxmin : Return the index of the minimum over the requested axis. + DataFrame.idxmax : Return the index of the maximum over the requested axis. + + Examples + -------- + >>> idx = pd.MultiIndex.from_arrays( + ... [["warm", "warm", "cold", "cold"], ["dog", "falcon", "fish", "spider"]], + ... names=["blooded", "animal"], + ... ) + >>> s = pd.Series([4, 2, 0, 8], name="legs", index=idx) + >>> s + blooded animal + warm dog 4 + falcon 2 + cold fish 0 + spider 8 + Name: legs, dtype: int64 + + >>> s.max() + 8 + """ return NDFrame.max( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs ) From eadc1294b13bdc661283fcdf3ba54a50f31feeab Mon Sep 17 00:00:00 2001 From: Tuhin Sharma Date: Sun, 5 May 2024 01:04:48 +0530 Subject: [PATCH 3/3] DOC: Enforce Numpy Docstring Validation for pandas.Series.median (#58575) * DOC: add RT03,SA01 for pandas.Series.median * DOC: remove RT03,SA01 for pandas.Series.median --- ci/code_checks.sh | 1 - pandas/core/series.py | 70 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index b7ba141702261..dbb799779c1c4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -207,7 +207,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.list.len SA01" \ -i "pandas.Series.lt PR07,SA01" \ -i "pandas.Series.mean RT03,SA01" \ - -i "pandas.Series.median RT03,SA01" \ -i "pandas.Series.min RT03" \ -i "pandas.Series.mod PR07" \ -i "pandas.Series.mode SA01" \ diff --git a/pandas/core/series.py b/pandas/core/series.py index 8ee32d915420b..36b9bd2c59dce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6279,7 +6279,6 @@ def mean( ) @deprecate_nonkeyword_arguments(version="3.0", allowed_args=["self"], name="median") - @doc(make_doc("median", ndim=1)) def median( self, axis: Axis | None = 0, @@ -6287,6 +6286,75 @@ def median( numeric_only: bool = False, **kwargs, ) -> Any: + """ + Return the median of the values over the requested axis. + + Parameters + ---------- + axis : {index (0)} + Axis for the function to be applied on. + For `Series` this parameter is unused and defaults to 0. + + For DataFrames, specifying ``axis=None`` will apply the aggregation + across both axes. + + .. versionadded:: 2.0.0 + + skipna : bool, default True + Exclude NA/null values when computing the result. + numeric_only : bool, default False + Include only float, int, boolean columns. + **kwargs + Additional keyword arguments to be passed to the function. + + Returns + ------- + scalar or Series (if level specified) + Median of the values for the requested axis. + + See Also + -------- + numpy.median : Equivalent numpy function for computing median. + Series.sum : Sum of the values. + Series.median : Median of the values. + Series.std : Standard deviation of the values. + Series.var : Variance of the values. + Series.min : Minimum value. + Series.max : Maximum value. + + Examples + -------- + >>> s = pd.Series([1, 2, 3]) + >>> s.median() + 2.0 + + With a DataFrame + + >>> df = pd.DataFrame({"a": [1, 2], "b": [2, 3]}, index=["tiger", "zebra"]) + >>> df + a b + tiger 1 2 + zebra 2 3 + >>> df.median() + a 1.5 + b 2.5 + dtype: float64 + + Using axis=1 + + >>> df.median(axis=1) + tiger 1.5 + zebra 2.5 + dtype: float64 + + In this case, `numeric_only` should be set to `True` + to avoid getting an error. + + >>> df = pd.DataFrame({"a": [1, 2], "b": ["T", "Z"]}, index=["tiger", "zebra"]) + >>> df.median(numeric_only=True) + a 1.5 + dtype: float64 + """ return NDFrame.median( self, axis=axis, skipna=skipna, numeric_only=numeric_only, **kwargs )