Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Docstring Issues for pandas.Series.sparse.sp_values and pandas.Series.str.match (Issue #59592) #59873

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.sparse.fill_value SA01" \
-i "pandas.Series.sparse.from_coo PR07,SA01" \
-i "pandas.Series.sparse.npoints SA01" \
-i "pandas.Series.sparse.sp_values SA01" \
-i "pandas.Timedelta.max PR02" \
-i "pandas.Timedelta.min PR02" \
-i "pandas.Timedelta.resolution PR02" \
Expand Down
390 changes: 194 additions & 196 deletions doc/source/user_guide/style.ipynb

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions pandas/core/_numba/kernels/min_max_.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,9 @@ def grouped_min_max(
continue

if is_max:
if val > output[lab]:
output[lab] = val
output[lab] = max(val, output[lab])
else:
if val < output[lab]:
output[lab] = val
output[lab] = min(val, output[lab])

# Set labels that don't satisfy min_periods as np.nan
for lab, count in enumerate(nobs):
Expand Down
18 changes: 17 additions & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,7 +601,22 @@ def sp_index(self) -> SparseIndex:
@property
def sp_values(self) -> np.ndarray:
"""
An ndarray containing the non- ``fill_value`` values.
An ndarray containing the non-``fill_value`` values.

This method retrieves the non-fill values from a SparseArray. SparseArrays
are designed to efficiently store large arrays of data where most of the
elements are the same (the fill value). This method allows you to access
the actual data points that differ from the fill value.

Returns
-------
ndarray
An array containing the non-fill values.

See Also
--------
Series.sparse.fill_value : The fill value for the SparseArray.
arrays.SparseArray : Represents an array with sparse data.

Examples
--------
Expand All @@ -610,6 +625,7 @@ def sp_values(self) -> np.ndarray:
>>> s.sp_values
array([1, 2])
"""

return self._sparse_values

@property
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def _astype_nansafe(
)
raise ValueError(msg)

if copy or arr.dtype == object or dtype == object:
if copy or object in (arr.dtype, dtype):
# Explicit copy, or required since NumPy can't view from / to object.
return arr.astype(dtype, copy=True)

Expand Down
8 changes: 2 additions & 6 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,9 +683,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

elif dtype.kind == "f":
mst = np.min_scalar_type(fill_value)
if mst > dtype:
# e.g. mst is np.float64 and dtype is np.float32
dtype = mst
dtype = max(mst, dtype)

elif dtype.kind == "c":
mst = np.min_scalar_type(fill_value)
Expand Down Expand Up @@ -718,9 +716,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

elif dtype.kind == "c":
mst = np.min_scalar_type(fill_value)
if mst > dtype:
# e.g. mst is np.complex128 and dtype is np.complex64
dtype = mst
dtype = max(mst, dtype)

else:
dtype = np.dtype(np.object_)
Expand Down
7 changes: 2 additions & 5 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,11 +460,8 @@ def newfunc(values: np.ndarray, *, axis: AxisInt | None = None, **kwargs):
axis == 1
and values.ndim == 2
and values.flags["C_CONTIGUOUS"]
# only takes this path for wide arrays (long dataframes), for threshold see
# https://github.com/pandas-dev/pandas/pull/43311#issuecomment-974891737
and (values.shape[1] / 1000) > values.shape[0]
and values.dtype != object
and values.dtype != bool
and values.shape[1] / 1000 > values.shape[0]
and values.dtype not in (object, bool)
):
arrs = list(values)
if kwargs.get("mask") is not None:
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1366,6 +1366,10 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=None):
"""
Determine if each string starts with a match of a regular expression.

This method checks if each string in the Series starts with a substring
that matches the given regular expression pattern. It returns a boolean
Series indicating whether each string meets the condition.

Parameters
----------
pat : str
Expand Down Expand Up @@ -1402,6 +1406,7 @@ def match(self, pat: str, case: bool = True, flags: int = 0, na=None):
2 False
dtype: bool
"""

result = self._data.array._str_match(pat, case=case, flags=flags, na=na)
return self._wrap_result(result, fill_value=na, returns_string=False)

Expand Down
3 changes: 1 addition & 2 deletions pandas/io/excel/_odfreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,7 @@ def get_sheet_data(
empty_cells = 0
table_row.extend([value] * column_repeat)

if max_row_len < len(table_row):
max_row_len = len(table_row)
max_row_len = max(max_row_len, len(table_row))

row_repeat = self._get_row_repeat(sheet_row)
if len(table_row) == 0:
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/arithmetic/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,7 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array
expected = TimedeltaIndex(["3 Days", "36 Hours"])
if isinstance(three_days, np.timedelta64):
dtype = three_days.dtype
if dtype < np.dtype("m8[s]"):
# i.e. resolution is lower -> use lowest supported resolution
dtype = np.dtype("m8[s]")
dtype = max(dtype, np.dtype("m8[s]"))
expected = expected.astype(dtype)
elif type(three_days) is timedelta:
expected = expected.astype("m8[us]")
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
def test_expr_with_column_name_with_backtick(self):
# GH 59285
df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
result = df.query("`a``b` < 2") # noqa
result = df.query("`a``b` < 2")
# Note: Formatting checks may wrongly consider the above ``inline code``.
expected = df[df["a`b"] < 2]
tm.assert_frame_equal(result, expected)
Expand Down