Skip to content

Commit

Permalink
refactor(python,rust): More cleanup around arange (#9769)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Jul 8, 2023
1 parent c752a74 commit a49836e
Show file tree
Hide file tree
Showing 11 changed files with 51 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug_report_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ body:
```
Replace this line with the output of pl.show_versions(), leave the backticks in place
```
</details>
validations:
required: true
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ pub(super) fn int_ranges(s: &[Series], step: i64) -> PolarsResult<Series> {
polars_bail!(
ComputeError:
"lengths of `start`: {} and `end`: {} arguments `\
cannot be matched in the `arange` expression",
cannot be matched in the `int_ranges` expression",
start.len(), end.len()
);
}
Expand Down
2 changes: 1 addition & 1 deletion polars/polars-lazy/polars-plan/src/dsl/functions/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use super::*;
pub fn arg_sort_by<E: AsRef<[Expr]>>(by: E, descending: &[bool]) -> Expr {
let e = &by.as_ref()[0];
let name = expr_output_name(e).unwrap();
arange(lit(0 as IdxSize), count().cast(IDX_DTYPE), 1)
int_range(lit(0 as IdxSize), count().cast(IDX_DTYPE), 1)
.sort_by(by, descending)
.alias(name.as_ref())
}
Expand Down
6 changes: 3 additions & 3 deletions polars/tests/it/lazy/expressions/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ use super::*;

#[test]
#[cfg(feature = "range")]
fn test_arange_agg() -> PolarsResult<()> {
fn test_int_range_agg() -> PolarsResult<()> {
let df = df![
"x" => [5, 5, 4, 4, 2, 2]
]?;

let out = df
.lazy()
.with_columns([arange(lit(0i32), count(), 1).over([col("x")])])
.with_columns([int_range(lit(0i32), count(), 1).over([col("x")])])
.collect()?;
assert_eq!(
Vec::from_iter(out.column("arange")?.i64()?.into_no_null_iter()),
Vec::from_iter(out.column("int")?.i64()?.into_no_null_iter()),
&[0, 1, 0, 1, 0, 1]
);

Expand Down
2 changes: 1 addition & 1 deletion py-polars/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5144,7 +5144,7 @@ def groupby_dynamic(
>>> df = pl.DataFrame(
... {
... "idx": pl.arange(0, 6, eager=True),
... "idx": pl.int_range(0, 6, eager=True),
... "A": ["A", "A", "B", "B", "B", "C"],
... }
... )
Expand Down Expand Up @@ -6563,7 +6563,7 @@ def unstack(
>>> df = pl.DataFrame(
... {
... "col1": list(ascii_uppercase[0:9]),
... "col2": pl.arange(0, 9, eager=True),
... "col2": pl.int_range(0, 9, eager=True),
... }
... )
>>> df
Expand Down Expand Up @@ -6635,7 +6635,7 @@ def unstack(
if how == "horizontal":
df = (
df.with_columns(
(F.arange(0, n_cols * n_rows, eager=True) % n_cols).alias(
(F.int_range(0, n_cols * n_rows, eager=True) % n_cols).alias(
"__sort_order"
),
)
Expand Down
28 changes: 25 additions & 3 deletions py-polars/polars/functions/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,11 @@ def arange(
"""
Generate a range of integers.
This can be used in a ``select``, ``with_columns`` etc. Be sure that the resulting
range size is equal to the length of the DataFrame you are collecting.
.. deprecated:: 0.18.5
``arange`` has been replaced by two new functions: ``int_range`` for generating
a single range, and ``int_ranges`` for generating a list column with multiple
ranges. ``arange`` will remain available as an alias for `int_range`, which
means it will lose the functionality to generate multiple ranges.
Parameters
----------
Expand All @@ -99,6 +102,11 @@ def arange(
Evaluate immediately and return a ``Series``. If set to ``False`` (default),
return an expression instead.
See Also
--------
int_range : Generate a range of integers.
int_ranges : Generate a range of integers for each row of the input columns.
Examples
--------
>>> pl.arange(0, 3, eager=True)
Expand Down Expand Up @@ -141,6 +149,7 @@ def int_range(
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsIntegerType = ...,
eager: Literal[False] = ...,
) -> Expr:
...
Expand All @@ -152,6 +161,7 @@ def int_range(
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsIntegerType = ...,
eager: Literal[True],
) -> Series:
...
Expand All @@ -163,6 +173,7 @@ def int_range(
end: int | IntoExpr,
step: int = ...,
*,
dtype: PolarsIntegerType = ...,
eager: bool,
) -> Expr | Series:
...
Expand All @@ -173,6 +184,7 @@ def int_range(
end: int | IntoExpr,
step: int = 1,
*,
dtype: PolarsIntegerType = Int64,
eager: bool = False,
) -> Expr | Series:
"""
Expand All @@ -186,6 +198,8 @@ def int_range(
Upper bound of the range (exclusive).
step
Step size of the range.
dtype
Data type of the range. Defaults to ``Int64``.
eager
Evaluate immediately and return a ``Series``. If set to ``False`` (default),
return an expression instead.
Expand All @@ -194,6 +208,10 @@ def int_range(
-------
Column of data type ``Int64``.
See Also
--------
int_ranges : Generate a range of integers for each row of the input columns.
Examples
--------
>>> pl.int_range(0, 3, eager=True)
Expand All @@ -208,7 +226,7 @@ def int_range(
"""
start = parse_as_expression(start)
end = parse_as_expression(end)
result = wrap_expr(plr.int_range(start, end, step))
result = wrap_expr(plr.int_range(start, end, step, dtype))

if eager:
return F.select(result).to_series()
Expand Down Expand Up @@ -281,6 +299,10 @@ def int_ranges(
-------
Column of data type ``List(dtype)``.
See Also
--------
int_range : Generate a single range of integers.
Examples
--------
>>> df = pl.DataFrame({"start": [1, -1], "end": [3, 2]})
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2607,7 +2607,7 @@ def groupby_dynamic(
>>> lf = pl.LazyFrame(
... {
... "idx": pl.arange(0, 6, eager=True),
... "idx": pl.int_range(0, 6, eager=True),
... "A": ["A", "A", "B", "B", "B", "C"],
... }
... )
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def __init__(
self._s = series_to_pyseries(name, values)

elif isinstance(values, range):
self._s = range_to_series(name, values, dtype=dtype)._s
self._s = range_to_series(name, values, dtype=dtype)._s # type: ignore[arg-type]

elif isinstance(values, Sequence):
self._s = sequence_to_pyseries(
Expand Down
7 changes: 4 additions & 3 deletions py-polars/polars/utils/various.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pathlib import Path

from polars import DataFrame, Series
from polars.type_aliases import PolarsDataType, SizeUnit
from polars.type_aliases import PolarsDataType, PolarsIntegerType, SizeUnit

if sys.version_info >= (3, 10):
from typing import ParamSpec, TypeGuard
Expand Down Expand Up @@ -96,10 +96,11 @@ def is_str_sequence(


def range_to_series(
name: str, rng: range, dtype: PolarsDataType | None = None
name: str, rng: range, dtype: PolarsIntegerType | None = None
) -> Series:
"""Fast conversion of the given range to a Series."""
return F.arange(
dtype = dtype or Int64
return F.int_range(
start=rng.start,
end=rng.stop,
step=rng.step,
Expand Down
12 changes: 10 additions & 2 deletions py-polars/src/functions/range.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,16 @@ pub fn arange(start: PyExpr, end: PyExpr, step: i64) -> PyExpr {
}

#[pyfunction]
pub fn int_range(start: PyExpr, end: PyExpr, step: i64) -> PyExpr {
dsl::int_range(start.inner, end.inner, step).into()
pub fn int_range(start: PyExpr, end: PyExpr, step: i64, dtype: Wrap<DataType>) -> PyExpr {
let dtype = dtype.0;

let mut result = dsl::int_range(start.inner, end.inner, step);

if dtype != DataType::Int64 {
result = result.cast(dtype)
}

result.into()
}

#[pyfunction]
Expand Down

0 comments on commit a49836e

Please sign in to comment.