Skip to content

Commit

Permalink
depr(python): Rename map to map_batches (#10801)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Aug 30, 2023
1 parent 7f89244 commit 036a0bd
Show file tree
Hide file tree
Showing 19 changed files with 250 additions and 66 deletions.
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/expressions/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ These functions are available from the polars module root and can be used as exp
last
lit
map
map_batches
map_groups
max
max_horizontal
Expand Down Expand Up @@ -98,6 +99,7 @@ These functions are available from the polars module root and can be used as exp
Expr.head
Expr.implode
Expr.map
Expr.map_batches
Expr.map_elements
Expr.max
Expr.mean
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Miscellaneous
LazyFrame.fetch
LazyFrame.lazy
LazyFrame.map
LazyFrame.map_batches
LazyFrame.pipe
LazyFrame.profile

Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
last,
lit,
map,
map_batches,
map_groups,
max,
max_horizontal,
Expand Down Expand Up @@ -334,6 +335,7 @@
"last",
"lit",
"map",
"map_batches",
"map_groups",
"mean",
"median",
Expand Down
50 changes: 41 additions & 9 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def function(s: Series) -> Series: # pragma: no cover
args = [inp if not isinstance(inp, Expr) else s for inp in inputs]
return ufunc(*args, **kwargs)

return self.map(function)
return self.map_batches(function)

@classmethod
def from_json(cls, value: str) -> Self:
Expand Down Expand Up @@ -3634,7 +3634,7 @@ def where(self, predicate: Expr) -> Self:
"""
return self.filter(predicate)

def map(
def map_batches(
self,
function: Callable[[Series], Series | Any],
return_dtype: PolarsDataType | None = None,
Expand Down Expand Up @@ -3678,7 +3678,7 @@ def map(
... "cosine": [1.0, 0.0, -1.0, 0.0],
... }
... )
>>> df.select(pl.all().map(lambda x: x.to_numpy().argmax()))
>>> df.select(pl.all().map_batches(lambda x: x.to_numpy().argmax()))
shape: (1, 2)
┌──────┬────────┐
│ sine ┆ cosine │
Expand All @@ -3691,7 +3691,9 @@ def map(
"""
if return_dtype is not None:
return_dtype = py_type_to_dtype(return_dtype)
return self._from_pyexpr(self._pyexpr.map(function, return_dtype, agg_list))
return self._from_pyexpr(
self._pyexpr.map_batches(function, return_dtype, agg_list)
)

def map_elements(
self,
Expand Down Expand Up @@ -3842,13 +3844,15 @@ def wrap_f(x: Series) -> Series: # pragma: no cover
)

if strategy == "thread_local":
return self.map(wrap_f, agg_list=True, return_dtype=return_dtype)
return self.map_batches(wrap_f, agg_list=True, return_dtype=return_dtype)
elif strategy == "threading":

def wrap_threading(x: Series) -> Series:
def get_lazy_promise(df: DataFrame) -> LazyFrame:
return df.lazy().select(
F.col("x").map(wrap_f, agg_list=True, return_dtype=return_dtype)
F.col("x").map_batches(
wrap_f, agg_list=True, return_dtype=return_dtype
)
)

df = x.to_frame("x")
Expand Down Expand Up @@ -3880,7 +3884,9 @@ def get_lazy_promise(df: DataFrame) -> LazyFrame:
out = [df.to_series() for df in F.collect_all(partitions)]
return F.concat(out, rechunk=False)

return self.map(wrap_threading, agg_list=True, return_dtype=return_dtype)
return self.map_batches(
wrap_threading, agg_list=True, return_dtype=return_dtype
)
else:
ValueError(f"Strategy {strategy} is not supported.")

Expand Down Expand Up @@ -5075,7 +5081,7 @@ def inspect(s: Series) -> Series: # pragma: no cover
print(fmt.format(s))
return s

return self.map(inspect, return_dtype=None, agg_list=True)
return self.map_batches(inspect, return_dtype=None, agg_list=True)

def interpolate(self, method: InterpolationMethod = "linear") -> Self:
"""
Expand Down Expand Up @@ -9011,7 +9017,33 @@ def inner(s: Series) -> Series:
)

func = inner_with_default if default is not None else inner
return self.map(func)
return self.map_batches(func)

@deprecate_renamed_function("map_batches", version="0.19.0")
def map(
self,
function: Callable[[Series], Series | Any],
return_dtype: PolarsDataType | None = None,
*,
agg_list: bool = False,
) -> Self:
"""
Apply a custom python function to a Series or sequence of Series.
.. deprecated:: 0.19.0
This method has been renamed to :func:`Expr.map_batches`.
Parameters
----------
function
Lambda/ function to apply.
return_dtype
Dtype of the output Series.
agg_list
Aggregate list
"""
return self.map_batches(function, return_dtype, agg_list=agg_list)

@deprecate_renamed_function("map_elements", version="0.19.0")
def apply(
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
implode,
last,
map,
map_batches,
map_groups,
mean,
median,
Expand Down Expand Up @@ -139,6 +140,7 @@
"last",
"lit",
"map",
"map_batches",
"map_groups",
"mean",
"median",
Expand Down
35 changes: 33 additions & 2 deletions py-polars/polars/functions/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,7 +961,7 @@ def cov(a: str | Expr, b: str | Expr) -> Expr:
return wrap_expr(plr.cov(a._pyexpr, b._pyexpr))


def map(
def map_batches(
exprs: Sequence[str] | Sequence[Expr],
function: Callable[[Sequence[Series]], Series],
return_dtype: PolarsDataType | None = None,
Expand Down Expand Up @@ -999,7 +999,7 @@ def map(
>>>
>>> df.with_columns(
... (
... pl.struct(["a", "b"]).map(
... pl.struct(["a", "b"]).map_batches(
... lambda x: test_func(x.struct.field("a"), x.struct.field("b"), 1)
... )
... ).alias("a+b+c")
Expand All @@ -1015,6 +1015,7 @@ def map(
│ 3 ┆ 6 ┆ 10 │
│ 4 ┆ 7 ┆ 12 │
└─────┴─────┴───────┘
"""
exprs = parse_as_list_of_expressions(exprs)
return wrap_expr(
Expand All @@ -1024,6 +1025,36 @@ def map(
)


@deprecate_renamed_function("map_batches", version="0.19.0")
def map(
exprs: Sequence[str] | Sequence[Expr],
function: Callable[[Sequence[Series]], Series],
return_dtype: PolarsDataType | None = None,
) -> Expr:
"""
Map a custom function over multiple columns/expressions.
.. deprecated:: 0.19.0
This function has been renamed to :func:`map_batches`.
Parameters
----------
exprs
Input Series to f
function
Function to apply over the input
return_dtype
dtype of the output Series
Returns
-------
Expr
Expression with the data type given by ``return_dtype``.
"""
return map_batches(exprs, function, return_dtype)


def map_groups(
exprs: Sequence[str | Expr],
function: Callable[[Sequence[Series]], Series | Any],
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/io/excel/_write_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def _map_str(s: Series) -> Series:
return s.__class__(s.name, [str(v) for v in s.to_list()])

cast_cols = [
F.col(col).map(_map_str).alias(col)
F.col(col).map_batches(_map_str).alias(col)
for col, tp in df.schema.items()
if tp in (List, Struct, Object)
]
Expand Down
76 changes: 69 additions & 7 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,9 @@ def inspect(s: DataFrame) -> DataFrame:
print(fmt.format(s))
return s

return self.map(inspect, predicate_pushdown=True, projection_pushdown=True)
return self.map_batches(
inspect, predicate_pushdown=True, projection_pushdown=True
)

def sort(
self,
Expand Down Expand Up @@ -5114,7 +5116,7 @@ def melt(
self._ldf.melt(id_vars, value_vars, value_name, variable_name, streamable)
)

def map(
def map_batches(
self,
function: Callable[[DataFrame], DataFrame],
*,
Expand Down Expand Up @@ -5174,7 +5176,7 @@ def map(
... "b": [3, 4],
... }
... )
>>> lf.map(lambda x: 2 * x).collect()
>>> lf.map_batches(lambda x: 2 * x).collect()
shape: (2, 2)
┌─────┬─────┐
│ a ┆ b │
Expand All @@ -5192,7 +5194,7 @@ def map(
slice_pushdown = False

return self._from_pyldf(
self._ldf.map(
self._ldf.map_batches(
function,
predicate_pushdown,
projection_pushdown,
Expand Down Expand Up @@ -5510,7 +5512,8 @@ def groupby(
"""
Start a group by operation.
Alias for :func:`LazyFrame.group_by`.
.. deprecated:: 0.19.0
This method has been renamed to :func:`LazyFrame.group_by`.
Parameters
----------
Expand Down Expand Up @@ -5542,7 +5545,8 @@ def groupby_rolling(
"""
Create rolling groups based on a time, Int32, or Int64 column.
Alias for :func:`LazyFrame.group_by_rolling`.
.. deprecated:: 0.19.0
This method has been renamed to :func:`LazyFrame.group_by_rolling`.
Parameters
----------
Expand Down Expand Up @@ -5605,7 +5609,8 @@ def groupby_dynamic(
"""
Group based on a time value (or index value of type Int32, Int64).
Alias for :func:`LazyFrame.group_by_rolling`.
.. deprecated:: 0.19.0
This method has been renamed to :func:`LazyFrame.group_by_dynamic`.
Parameters
----------
Expand Down Expand Up @@ -5674,3 +5679,60 @@ def groupby_dynamic(
start_by=start_by,
check_sorted=check_sorted,
)

@deprecate_renamed_function("map_batches", version="0.19.0")
def map(
self,
function: Callable[[DataFrame], DataFrame],
*,
predicate_pushdown: bool = True,
projection_pushdown: bool = True,
slice_pushdown: bool = True,
no_optimizations: bool = False,
schema: None | SchemaDict = None,
validate_output_schema: bool = True,
streamable: bool = False,
) -> Self:
"""
Apply a custom function.
.. deprecated:: 0.19.0
This method has been renamed to :func:`LazyFrame.map_batches`.
Parameters
----------
function
Lambda/ function to apply.
predicate_pushdown
Allow predicate pushdown optimization to pass this node.
projection_pushdown
Allow projection pushdown optimization to pass this node.
slice_pushdown
Allow slice pushdown optimization to pass this node.
no_optimizations
Turn off all optimizations past this point.
schema
Output schema of the function, if set to ``None`` we assume that the schema
will remain unchanged by the applied function.
validate_output_schema
It is paramount that polars' schema is correct. This flag will ensure that
the output schema of this function will be checked with the expected schema.
Setting this to ``False`` will not do this check, but may lead to hard to
debug bugs.
streamable
Whether the function that is given is eligible to be running with the
streaming engine. That means that the function must produce the same result
when it is executed in batches or when it is be executed on the full
dataset.
"""
return self.map_batches(
function,
predicate_pushdown=predicate_pushdown,
projection_pushdown=projection_pushdown,
slice_pushdown=slice_pushdown,
no_optimizations=no_optimizations,
schema=schema,
validate_output_schema=validate_output_schema,
streamable=streamable,
)
7 changes: 6 additions & 1 deletion py-polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,12 @@ impl PyExpr {
}

#[pyo3(signature = (lambda, output_type, agg_list))]
fn map(&self, lambda: PyObject, output_type: Option<Wrap<DataType>>, agg_list: bool) -> Self {
fn map_batches(
&self,
lambda: PyObject,
output_type: Option<Wrap<DataType>>,
agg_list: bool,
) -> Self {
map_single(self, lambda, output_type, agg_list)
}

Expand Down
2 changes: 1 addition & 1 deletion py-polars/src/lazyframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -920,7 +920,7 @@ impl PyLazyFrame {

#[pyo3(signature = (lambda, predicate_pushdown, projection_pushdown, slice_pushdown, streamable, schema, validate_output))]
#[allow(clippy::too_many_arguments)]
fn map(
fn map_batches(
&self,
lambda: PyObject,
predicate_pushdown: bool,
Expand Down
Loading

0 comments on commit 036a0bd

Please sign in to comment.