Skip to content

Commit

Permalink
Merge branch 'main' into fix_docstring_pandas_styler
Browse files Browse the repository at this point in the history
  • Loading branch information
hlakams committed Aug 26, 2024
2 parents 175dd36 + bb4ab4f commit dc5fa49
Show file tree
Hide file tree
Showing 7 changed files with 147 additions and 52 deletions.
11 changes: 0 additions & 11 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.sparse.sp_values SA01" \
-i "pandas.Series.sparse.to_coo PR07,RT03,SA01" \
-i "pandas.Series.std PR01,RT03,SA01" \
-i "pandas.Series.str.wrap RT03,SA01" \
-i "pandas.Series.str.zfill RT03" \
-i "pandas.Timedelta.asm8 SA01" \
-i "pandas.Timedelta.ceil SA01" \
-i "pandas.Timedelta.components SA01" \
Expand Down Expand Up @@ -175,14 +173,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Timestamp.tzinfo GL08" \
-i "pandas.Timestamp.value GL08" \
-i "pandas.Timestamp.year GL08" \
-i "pandas.api.extensions.ExtensionArray.duplicated RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.fillna SA01" \
-i "pandas.api.extensions.ExtensionArray.insert PR07,RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.interpolate PR01,SA01" \
-i "pandas.api.extensions.ExtensionArray.isin PR07,RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.tolist RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.unique RT03,SA01" \
-i "pandas.api.extensions.ExtensionArray.view SA01" \
-i "pandas.api.interchange.from_dataframe RT03,SA01" \
-i "pandas.api.types.is_bool PR01,SA01" \
-i "pandas.api.types.is_categorical_dtype SA01" \
Expand Down Expand Up @@ -235,7 +226,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.ohlc SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
-i "pandas.core.groupby.DataFrameGroupBy.prod SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
-i "pandas.core.groupby.DataFrameGroupBy.sum SA01" \
-i "pandas.core.groupby.SeriesGroupBy.__iter__ RT03,SA01" \
Expand All @@ -252,7 +242,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
-i "pandas.core.groupby.SeriesGroupBy.ohlc SA01" \
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
-i "pandas.core.groupby.SeriesGroupBy.prod SA01" \
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
-i "pandas.core.groupby.SeriesGroupBy.sum SA01" \
-i "pandas.core.resample.Resampler.__iter__ RT03,SA01" \
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Other enhancements
- Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
- :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
- :class:`.errors.DtypeWarning` improved to include column names when mixed data types are detected (:issue:`58174`)
- :class:`Series` now supports the Arrow PyCapsule Interface for export (:issue:`59518`)
- :func:`DataFrame.to_excel` argument ``merge_cells`` now accepts a value of ``"columns"`` to only merge :class:`MultiIndex` column header header cells (:issue:`35384`)
- :meth:`DataFrame.corrwith` now accepts ``min_periods`` as optional arguments, as in :meth:`DataFrame.corr` and :meth:`Series.corr` (:issue:`9490`)
- :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
Expand Down
52 changes: 51 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1137,6 +1137,13 @@ def fillna(
ExtensionArray
With NA/NaN filled.
See Also
--------
api.extensions.ExtensionArray.dropna : Return ExtensionArray without
NA values.
api.extensions.ExtensionArray.isna : A 1-D array indicating if
each value is missing.
Examples
--------
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
Expand Down Expand Up @@ -1220,6 +1227,15 @@ def duplicated(
Returns
-------
ndarray[bool]
With true in indices where elements are duplicated and false otherwise.
See Also
--------
DataFrame.duplicated : Return boolean Series denoting
duplicate rows.
Series.duplicated : Indicate duplicate Series values.
api.extensions.ExtensionArray.unique : Compute the ExtensionArray
of unique values.
Examples
--------
Expand Down Expand Up @@ -1303,6 +1319,13 @@ def unique(self) -> Self:
Returns
-------
pandas.api.extensions.ExtensionArray
With unique values from the input array.
See Also
--------
Index.unique: Return unique values in the index.
Series.unique: Return unique values of Series object.
unique: Return unique values based on a hash table.
Examples
--------
Expand Down Expand Up @@ -1436,10 +1459,18 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
Parameters
----------
values : np.ndarray or ExtensionArray
Values to compare every element in the array against.
Returns
-------
np.ndarray[bool]
With true at indices where value is in `values`.
See Also
--------
DataFrame.isin: Whether each element in the DataFrame is contained in values.
Index.isin: Return a boolean array where the index values are in values.
Series.isin: Whether elements in Series are contained in values.
Examples
--------
Expand Down Expand Up @@ -1743,6 +1774,12 @@ def view(self, dtype: Dtype | None = None) -> ArrayLike:
ExtensionArray or np.ndarray
A view on the :class:`ExtensionArray`'s data.
See Also
--------
api.extensions.ExtensionArray.ravel: Return a flattened view on input array.
Index.view: Equivalent function for Index.
ndarray.view: New view of array with the same data.
Examples
--------
This gives view on the underlying data of an ``ExtensionArray`` and is not a
Expand Down Expand Up @@ -2201,6 +2238,12 @@ def tolist(self) -> list:
Returns
-------
list
Python list of values in array.
See Also
--------
Index.to_list: Return a list of the values in the Index.
Series.to_list: Return a list of the values in the Series.
Examples
--------
Expand All @@ -2223,11 +2266,18 @@ def insert(self, loc: int, item) -> Self:
Parameters
----------
loc : int
Index where the `item` needs to be inserted.
item : scalar-like
Value to be inserted.
Returns
-------
same type as self
ExtensionArray
With `item` inserted at `loc`.
See Also
--------
Index.insert: Make new Index inserting new item at location.
Notes
-----
Expand Down
77 changes: 37 additions & 40 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,32 +164,6 @@ class providing the base-class of operations.
to each row or column of a DataFrame.
"""

_groupby_agg_method_template = """
Compute {fname} of group values.
Parameters
----------
numeric_only : bool, default {no}
Include only float, int, boolean columns.
.. versionchanged:: 2.0.0
numeric_only no longer accepts ``None``.
min_count : int, default {mc}
The required number of valid values to perform the operation. If fewer
than ``min_count`` non-NA values are present the result will be NA.
Returns
-------
Series or DataFrame
Computed {fname} of values within each group.
Examples
--------
{example}
"""

_groupby_agg_method_engine_template = """
Compute {fname} of group values.
Expand Down Expand Up @@ -3029,16 +3003,38 @@ def sum(
return result

@final
@doc(
_groupby_agg_method_template,
fname="prod",
no=False,
mc=0,
example=dedent(
"""\
def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
"""
Compute prod of group values.
Parameters
----------
numeric_only : bool, default False
Include only float, int, boolean columns.
.. versionchanged:: 2.0.0
numeric_only no longer accepts ``None``.
min_count : int, default 0
The required number of valid values to perform the operation. If fewer
than ``min_count`` non-NA values are present the result will be NA.
Returns
-------
Series or DataFrame
Computed prod of values within each group.
See Also
--------
Series.prod : Return the product of the values over the requested axis.
DataFrame.prod : Return the product of the values over the requested axis.
Examples
--------
For SeriesGroupBy:
>>> lst = ['a', 'a', 'b', 'b']
>>> lst = ["a", "a", "b", "b"]
>>> ser = pd.Series([1, 2, 3, 4], index=lst)
>>> ser
a 1
Expand All @@ -3054,8 +3050,11 @@ def sum(
For DataFrameGroupBy:
>>> data = [[1, 8, 2], [1, 2, 5], [2, 5, 8], [2, 6, 9]]
>>> df = pd.DataFrame(data, columns=["a", "b", "c"],
... index=["tiger", "leopard", "cheetah", "lion"])
>>> df = pd.DataFrame(
... data,
... columns=["a", "b", "c"],
... index=["tiger", "leopard", "cheetah", "lion"],
... )
>>> df
a b c
tiger 1 8 2
Expand All @@ -3066,10 +3065,8 @@ def sum(
b c
a
1 16 10
2 30 72"""
),
)
def prod(self, numeric_only: bool = False, min_count: int = 0) -> NDFrameT:
2 30 72
"""
return self._agg_general(
numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
)
Expand Down
27 changes: 27 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pandas._libs.lib import is_range_indexer
from pandas.compat import PYPY
from pandas.compat._constants import REF_COUNT
from pandas.compat._optional import import_optional_dependency
from pandas.compat.numpy import function as nv
from pandas.errors import (
ChainedAssignmentError,
Expand Down Expand Up @@ -558,6 +559,32 @@ def _init_dict(

# ----------------------------------------------------------------------

def __arrow_c_stream__(self, requested_schema=None):
"""
Export the pandas Series as an Arrow C stream PyCapsule.
This relies on pyarrow to convert the pandas Series to the Arrow
format (and follows the default behaviour of ``pyarrow.Array.from_pandas``
in its handling of the index, i.e. to ignore it).
This conversion is not necessarily zero-copy.
Parameters
----------
requested_schema : PyCapsule, default None
The schema to which the dataframe should be casted, passed as a
PyCapsule containing a C ArrowSchema representation of the
requested schema.
Returns
-------
PyCapsule
"""
pa = import_optional_dependency("pyarrow", min_version="16.0.0")
ca = pa.chunked_array([pa.Array.from_pandas(self, type=requested_schema)])
return ca.__arrow_c_stream__(requested_schema)

# ----------------------------------------------------------------------

@property
def _constructor(self) -> type[Series]:
return Series
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1853,6 +1853,7 @@ def zfill(self, width: int):
Returns
-------
Series/Index of objects.
A Series or Index where the strings are prepended with '0' characters.
See Also
--------
Expand Down Expand Up @@ -2385,6 +2386,13 @@ def wrap(
Returns
-------
Series or Index
A Series or Index where the strings are wrapped at the specified line width.
See Also
--------
Series.str.strip : Remove leading and trailing characters in Series/Index.
Series.str.lstrip : Remove leading characters in Series/Index.
Series.str.rstrip : Remove trailing characters in Series/Index.
Notes
-----
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/series/test_arrow_interface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import ctypes

import pytest

import pandas as pd

pa = pytest.importorskip("pyarrow", minversion="16.0")


def test_series_arrow_interface():
s = pd.Series([1, 4, 2])

capsule = s.__arrow_c_stream__()
assert (
ctypes.pythonapi.PyCapsule_IsValid(
ctypes.py_object(capsule), b"arrow_array_stream"
)
== 1
)

ca = pa.chunked_array(s)
expected = pa.chunked_array([[1, 4, 2]])
assert ca.equals(expected)

0 comments on commit dc5fa49

Please sign in to comment.