Skip to content

Commit

Permalink
SNOW-1842839: Add support for Series.str.pad (#2739)
Browse files Browse the repository at this point in the history
<!---
Please answer these questions before creating your pull request. Thanks!
--->

1. Which Jira issue is this PR addressing? Make sure that there is an
accompanying issue to your PR.

   <!---
   In this section, please add a Snowflake Jira issue number.

Note that if a corresponding GitHub issue exists, you should still
include
   the Snowflake Jira issue number. For example, for GitHub issue
#1400, you should
   add "SNOW-1335071" here.
    --->

   Fixes SNOW-1842839

2. Fill out the following pre-review checklist:

- [x] I am adding a new automated test(s) to verify correctness of my
new code
- [ ] If this test skips Local Testing mode, I'm requesting review from
@snowflakedb/local-testing
   - [ ] I am adding new logging messages
   - [ ] I am adding a new telemetry message
   - [ ] I am adding new credentials
   - [ ] I am adding a new dependency
- [ ] If this is a new feature/behavior, I'm adding the Local Testing
parity changes.
- [ ] I acknowledge that I have ensured my changes to be thread-safe.
Follow the link for more information: [Thread-safe Developer
Guidelines](https://github.com/snowflakedb/snowpark-python/blob/main/CONTRIBUTING.md#thread-safe-development)

3. Please describe how your code solves the related issue.

   Add support for Series.str.pad.
  • Loading branch information
sfc-gh-helmeleegy authored Dec 10, 2024
1 parent 99dde61 commit 92ce21f
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 12 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

- Added support for `Series.str.ljust` and `Series.str.rjust`.
- Added support for `Series.str.center`.
- Added support for `Series.str.pad`.


## 1.26.0 (2024-12-05)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/modin/supported/series_str_supported.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ the method in the left column.
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``normalize`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``pad`` | N | |
| ``pad`` | Y | |
+-----------------------------+---------------------------------+----------------------------------------------------+
| ``partition`` | N | |
+-----------------------------+---------------------------------+----------------------------------------------------+
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16260,7 +16260,7 @@ def str_center(self, width: int, fillchar: str = " ") -> "SnowflakeQueryCompiler
)
if not isinstance(fillchar, str):
raise TypeError(
f"fillchar must be of integer type, not {type(fillchar).__name__}"
f"fillchar must be a character, not {type(fillchar).__name__}"
)
if len(fillchar) != 1:
raise TypeError("fillchar must be a character, not str")
Expand Down Expand Up @@ -16443,8 +16443,15 @@ def str_pad(
width: int,
side: Literal["left", "right", "both"] = "left",
fillchar: str = " ",
) -> None:
ErrorMessage.method_not_implemented_error("pad", "Series.str")
) -> "SnowflakeQueryCompiler":
if side == "left":
return self.str_rjust(width, fillchar)
elif side == "right":
return self.str_ljust(width, fillchar)
elif side == "both":
return self.str_center(width, fillchar)
else:
raise ValueError("Invalid side")

def str_partition(self, sep: str = " ", expand: bool = True) -> None:
ErrorMessage.method_not_implemented_error("partition", "Series.str")
Expand Down Expand Up @@ -16506,7 +16513,7 @@ def str_ljust(self, width: int, fillchar: str = " ") -> "SnowflakeQueryCompiler"
)
if not isinstance(fillchar, str):
raise TypeError(
f"fillchar must be of integer type, not {type(fillchar).__name__}"
f"fillchar must be a character, not {type(fillchar).__name__}"
)
if len(fillchar) != 1:
raise TypeError("fillchar must be a character, not str")
Expand Down
51 changes: 50 additions & 1 deletion src/snowflake/snowpark/modin/plugin/docstrings/series_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,56 @@ def replace():
"""

def pad():
pass
"""
Pad strings in the Series/Index up to width.
Parameters
----------
width : int
Minimum width of resulting string; additional characters will be filled with character defined in fillchar.
side : {‘left’, ‘right’, ‘both’}, default ‘left’
Side from which to fill resulting string.
fillchar : str, default ‘ ‘
Additional character for filling, default is whitespace.
Returns
-------
Series or Index of object
Returns Series or Index with minimum number of char in object.
See also
--------
Series.str.rjust
Fills the left side of strings with an arbitrary character. Equivalent to Series.str.pad(side='left').
Series.str.ljust
Fills the right side of strings with an arbitrary character. Equivalent to Series.str.pad(side='right').
Series.str.center
Fills both sides of strings with an arbitrary character. Equivalent to Series.str.pad(side='both').
Series.str.zfill
Pad strings in the Series/Index by prepending ‘0’ character. Equivalent to Series.str.pad(side='left', fillchar='0').
Examples
--------
>>> s = pd.Series(["caribou", "tiger"])
>>> s
0 caribou
1 tiger
dtype: object
>>> s.str.pad(width=10)
0 caribou
1 tiger
dtype: object
>>> s.str.pad(width=10, side='right', fillchar='-')
0 caribou---
1 tiger-----
dtype: object
>>> s.str.pad(width=10, side='both', fillchar='-')
0 -caribou--
1 --tiger---
dtype: object
"""

def center():
"""
Expand Down
36 changes: 36 additions & 0 deletions tests/integ/modin/series/test_str_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,42 @@ def test_str_center_ljust_rjust_neg(func, width, fillchar):
getattr(snow_ser.str, func)(width=width, fillchar=fillchar)


@pytest.mark.parametrize("width", [-1, 0, 1, 10, 100])
@pytest.mark.parametrize("side", ["left", "right", "both"])
@pytest.mark.parametrize("fillchar", [" ", "#"])
@sql_count_checker(query_count=1)
def test_str_pad(width, side, fillchar):
native_ser = native_pd.Series(TEST_DATA)
snow_ser = pd.Series(native_ser)
eval_snowpark_pandas_result(
snow_ser,
native_ser,
lambda ser: ser.str.pad(width=width, side=side, fillchar=fillchar),
)


@pytest.mark.parametrize(
"width, side, fillchar, error",
[
(None, "both", " ", TypeError),
("ten", "both", " ", TypeError),
(10, None, " ", ValueError),
(10, 10, " ", ValueError),
(10, "invalid", " ", ValueError),
(10, "both", "", TypeError),
(10, "both", "ab", TypeError),
(10, "both", None, TypeError),
(10, "both", 10, TypeError),
],
)
@sql_count_checker(query_count=0)
def test_str_pad_neg(width, side, fillchar, error):
native_ser = native_pd.Series(TEST_DATA)
snow_ser = pd.Series(native_ser)
with pytest.raises(error):
snow_ser.str.pad(width=width, side=side, fillchar=fillchar)


@pytest.mark.parametrize(
"data",
[
Expand Down
6 changes: 0 additions & 6 deletions tests/unit/modin/test_series_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def test_str_cat_no_others(mock_str_register, mock_series):
(lambda s: s.str.encode("utf-8"), "encode"),
(lambda s: s.str.rsplit("_", n=1), "rsplit"),
(lambda s: s.str.join("_"), "join"),
(lambda s: s.str.pad(10), "pad"),
(lambda s: s.str.zfill(8), "zfill"),
(lambda s: s.str.wrap(3), "wrap"),
(lambda s: s.str.slice_replace(start=3, stop=5, repl="abc"), "slice_replace"),
Expand Down Expand Up @@ -99,11 +98,6 @@ def test_str_methods_with_dataframe_return(func, func_name, mock_series):
AttributeError,
"'NoneType' object has no attribute 'join'",
),
(
lambda s: s.str.pad(8, fillchar="abc"),
TypeError,
"fillchar must be a character, not str",
),
(lambda s: s.str.wrap(-1), ValueError, r"invalid width -1 \(must be > 0\)"),
(
lambda s: s.str.count(12),
Expand Down

0 comments on commit 92ce21f

Please sign in to comment.