From 0269f64f1031d138396eb26fa995106021d2fe61 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 8 Dec 2024 02:42:47 -0500 Subject: [PATCH] docs: Add more Rust examples to User Guide (#20194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Rodrigo Girão Serrão <5621605+rodrigogiraoserrao@users.noreply.github.com> --- .../user-guide/expressions/missing-data.py | 4 +- docs/source/src/rust/Cargo.toml | 4 +- .../user-guide/expressions/aggregation.rs | 20 ++- .../src/rust/user-guide/expressions/folds.rs | 86 +++++++++- .../user-guide/expressions/missing-data.rs | 74 ++++++++- .../rust/user-guide/expressions/strings.rs | 151 +++++++++++++++++- docs/source/user-guide/expressions/strings.md | 7 +- 7 files changed, 315 insertions(+), 31 deletions(-) diff --git a/docs/source/src/python/user-guide/expressions/missing-data.py b/docs/source/src/python/user-guide/expressions/missing-data.py index f078f5a34aa7..e61af94a79e4 100644 --- a/docs/source/src/python/user-guide/expressions/missing-data.py +++ b/docs/source/src/python/user-guide/expressions/missing-data.py @@ -43,10 +43,10 @@ # --8<-- [end:fill] # --8<-- [start:fillexpr] -fill_median_df = df.with_columns( +fill_expression_df = df.with_columns( pl.col("col2").fill_null((2 * pl.col("col1")).cast(pl.Int64)), ) -print(fill_median_df) +print(fill_expression_df) # --8<-- [end:fillexpr] # --8<-- [start:fillstrategy] diff --git a/docs/source/src/rust/Cargo.toml b/docs/source/src/rust/Cargo.toml index c87e87cffffa..1a08751ad4ad 100644 --- a/docs/source/src/rust/Cargo.toml +++ b/docs/source/src/rust/Cargo.toml @@ -75,7 +75,7 @@ required-features = ["polars/lazy"] [[bin]] name = "expressions-missing-data" path = "user-guide/expressions/missing-data.rs" -required-features = ["polars/lazy"] +required-features = ["polars/lazy", "polars/interpolate"] [[bin]] name = "expressions-operations" path = "user-guide/expressions/operations.rs" @@ -83,7 +83,7 @@ required-features = ["polars/lazy", "polars/approx_unique", "polars/dtype-struct [[bin]] name = "expressions-strings" path = "user-guide/expressions/strings.rs" -required-features = ["polars/lazy"] +required-features = ["polars/lazy", "polars/strings", "polars/regex"] [[bin]] name = "expressions-structs" path = "user-guide/expressions/structs.rs" diff --git a/docs/source/src/rust/user-guide/expressions/aggregation.rs b/docs/source/src/rust/user-guide/expressions/aggregation.rs index 76b6ce22272d..fa2e0be7e33a 100644 --- a/docs/source/src/rust/user-guide/expressions/aggregation.rs +++ b/docs/source/src/rust/user-guide/expressions/aggregation.rs @@ -34,7 +34,7 @@ fn main() -> Result<(), Box> { let dataset = CsvReadOptions::default() .with_has_header(true) - .with_schema(Some(Arc::new(schema))) + .with_schema_overwrite(Some(Arc::new(schema))) .map_parse_options(|parse_options| parse_options.with_try_parse_dates(true)) .into_reader_with_file_handle(Cursor::new(data)) .finish()?; @@ -88,7 +88,7 @@ fn main() -> Result<(), Box> { .clone() .lazy() .group_by(["state", "party"]) - .agg([len().count().alias("count")]) + .agg([len().alias("count")]) .filter( col("party") .eq(lit("Anti-Administration")) @@ -135,7 +135,21 @@ fn main() -> Result<(), Box> { // --8<-- [end:filter] // --8<-- [start:filter-nested] - // Contribute the Rust translation of the Python example by opening a PR. + let df = dataset + .clone() + .lazy() + .group_by(["state", "gender"]) + .agg([compute_age().mean().alias("avg birthday"), len().alias("#")]) + .sort( + ["#"], + SortMultipleOptions::default() + .with_order_descending(true) + .with_nulls_last(true), + ) + .limit(5) + .collect()?; + + println!("{}", df); // --8<-- [end:filter-nested] // --8<-- [start:sort] diff --git a/docs/source/src/rust/user-guide/expressions/folds.rs b/docs/source/src/rust/user-guide/expressions/folds.rs index 5f986233ecda..3ab11e8772c8 100644 --- a/docs/source/src/rust/user-guide/expressions/folds.rs +++ b/docs/source/src/rust/user-guide/expressions/folds.rs @@ -1,23 +1,99 @@ fn main() -> Result<(), Box> { // --8<-- [start:mansum] + use polars::lazy::dsl::sum_horizontal; use polars::prelude::*; - // Contribute the Rust translation of the Python example by opening a PR. + + let df = df!( + "label" => ["foo", "bar", "spam"], + "a" => [1, 2, 3], + "b" => [10, 20, 30], + )?; + + let result = df + .clone() + .lazy() + .select([ + fold_exprs( + lit(0), + |acc, val| (&acc + &val).map(Some), + [col("a"), col("b")], + ) + .alias("sum_fold"), + sum_horizontal([col("a"), col("b")], true)?.alias("sum_horz"), + ]) + .collect()?; + + println!("{:?}", result); // --8<-- [end:mansum] // --8<-- [start:mansum-explicit] - // Contribute the Rust translation of the Python example by opening a PR. + let acc = lit(0); + let f = |acc: Expr, val: Expr| acc + val; + + let result = df + .clone() + .lazy() + .select([ + f(f(acc, col("a")), col("b")), + fold_exprs( + lit(0), + |acc, val| (&acc + &val).map(Some), + [col("a"), col("b")], + ) + .alias("sum_fold"), + ]) + .collect()?; + + println!("{:?}", result); // --8<-- [end:mansum-explicit] // --8<-- [start:manprod] - // Contribute the Rust translation of the Python example by opening a PR. + let result = df + .clone() + .lazy() + .select([fold_exprs( + lit(0), + |acc, val| (&acc * &val).map(Some), + [col("a"), col("b")], + ) + .alias("prod")]) + .collect()?; + + println!("{:?}", result); // --8<-- [end:manprod] // --8<-- [start:manprod-fixed] - // Contribute the Rust translation of the Python example by opening a PR. + let result = df + .clone() + .lazy() + .select([fold_exprs( + lit(1), + |acc, val| (&acc * &val).map(Some), + [col("a"), col("b")], + ) + .alias("prod")]) + .collect()?; + + println!("{:?}", result); // --8<-- [end:manprod-fixed] // --8<-- [start:conditional] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df!( + "a" => [1, 2, 3], + "b" => [0, 1, 2], + )?; + + let result = df + .clone() + .lazy() + .filter(fold_exprs( + lit(true), + |acc, val| (&acc & &val).map(Some), + [col("*").gt(1)], + )) + .collect()?; + + println!("{:?}", result); // --8<-- [end:conditional] // --8<-- [start:string] diff --git a/docs/source/src/rust/user-guide/expressions/missing-data.rs b/docs/source/src/rust/user-guide/expressions/missing-data.rs index 437379dda37e..35534c54523e 100644 --- a/docs/source/src/rust/user-guide/expressions/missing-data.rs +++ b/docs/source/src/rust/user-guide/expressions/missing-data.rs @@ -23,38 +23,96 @@ fn main() -> Result<(), Box> { // --8<-- [end:isnull] // --8<-- [start:dataframe2] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df! ( + "col1" => [0.5, 1.0, 1.5, 2.0, 2.5], + "col2" => [Some(1), None, Some(3), None, Some(5)], + )?; + + println!("{}", df); // --8<-- [end:dataframe2] // --8<-- [start:fill] - // Contribute the Rust translation of the Python example by opening a PR. + let fill_literal_df = df + .clone() + .lazy() + .with_column(col("col2").fill_null(3)) + .collect()?; + + println!("{}", fill_literal_df); // --8<-- [end:fill] // --8<-- [start:fillstrategy] - // Contribute the Rust translation of the Python example by opening a PR. + + let fill_literal_df = df + .clone() + .lazy() + .with_columns([ + col("col2") + .fill_null_with_strategy(FillNullStrategy::Forward(None)) + .alias("forward"), + col("col2") + .fill_null_with_strategy(FillNullStrategy::Backward(None)) + .alias("backward"), + ]) + .collect()?; + + println!("{}", fill_literal_df); // --8<-- [end:fillstrategy] // --8<-- [start:fillexpr] - // Contribute the Rust translation of the Python example by opening a PR. + let fill_expression_df = df + .clone() + .lazy() + .with_column(col("col2").fill_null((lit(2) * col("col1")).cast(DataType::Int64))) + .collect()?; + + println!("{}", fill_expression_df); // --8<-- [end:fillexpr] // --8<-- [start:fillinterpolate] - // Contribute the Rust translation of the Python example by opening a PR. + let fill_interpolation_df = df + .clone() + .lazy() + .with_column(col("col2").interpolate(InterpolationMethod::Linear)) + .collect()?; + + println!("{}", fill_interpolation_df); // --8<-- [end:fillinterpolate] // --8<-- [start:nan] let nan_df = df!( - "value" => [1.0, f64::NAN, f64::NAN, 3.0], + "value" => [1.0, f64::NAN, f64::NAN, 3.0], )?; println!("{}", nan_df); // --8<-- [end:nan] // --8<-- [start:nan-computed] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df!( + "dividend" => [1.0, 0.0, -1.0], + "divisor" => [1.0, 0.0, -1.0], + )?; + + let result = df + .clone() + .lazy() + .select([col("dividend") / col("divisor")]) + .collect()?; + + println!("{}", result); // --8<-- [end:nan-computed] // --8<-- [start:nanfill] - // Contribute the Rust translation of the Python example by opening a PR. + let mean_nan_df = nan_df + .clone() + .lazy() + .with_column(col("value").fill_nan(Null {}.lit()).alias("replaced")) + .select([ + col("*").mean().name().suffix("_mean"), + col("*").sum().name().suffix("_sum"), + ]) + .collect()?; + + println!("{}", mean_nan_df); // --8<-- [end:nanfill] Ok(()) } diff --git a/docs/source/src/rust/user-guide/expressions/strings.rs b/docs/source/src/rust/user-guide/expressions/strings.rs index 60903fa827f5..c77eb35302ef 100644 --- a/docs/source/src/rust/user-guide/expressions/strings.rs +++ b/docs/source/src/rust/user-guide/expressions/strings.rs @@ -1,34 +1,169 @@ fn main() -> Result<(), Box> { // --8<-- [start:df] - // Contribute the Rust translation of the Python example by opening a PR. + use polars::prelude::*; + + let df = df! ( + "language" => ["English", "Dutch", "Portuguese", "Finish"], + "fruit" => ["pear", "peer", "pêra", "päärynä"], + )?; + + let result = df + .clone() + .lazy() + .with_columns([ + col("fruit").str().len_bytes().alias("byte_count"), + col("fruit").str().len_chars().alias("letter_count"), + ]) + .collect()?; + + println!("{}", result); // --8<-- [end:df] // --8<-- [start:existence] - // Contribute the Rust translation of the Python example by opening a PR. + let result = df + .clone() + .lazy() + .select([ + col("fruit"), + col("fruit") + .str() + .starts_with(lit("p")) + .alias("starts_with_p"), + col("fruit").str().contains(lit("p..r"), true).alias("p..r"), + col("fruit").str().contains(lit("e+"), true).alias("e+"), + col("fruit").str().ends_with(lit("r")).alias("ends_with_r"), + ]) + .collect()?; + + println!("{}", result); // --8<-- [end:existence] // --8<-- [start:extract] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df! ( + "urls" => [ + "http://vote.com/ballon_dor?candidate=messi&ref=polars", + "http://vote.com/ballon_dor?candidat=jorginho&ref=polars", + "http://vote.com/ballon_dor?candidate=ronaldo&ref=polars", + ] + )?; + + let result = df + .clone() + .lazy() + .select([col("urls").str().extract(lit(r"candidate=(\w+)"), 1)]) + .collect()?; + + println!("{}", result); // --8<-- [end:extract] // --8<-- [start:extract_all] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df! ( + "text" => ["123 bla 45 asd", "xyz 678 910t"] + )?; + + let result = df + .clone() + .lazy() + .select([col("text") + .str() + .extract_all(lit(r"(\d+)")) + .alias("extracted_nrs")]) + .collect()?; + + println!("{}", result); // --8<-- [end:extract_all] // --8<-- [start:replace] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df! ( + "text" => ["123abc", "abc456"] + )?; + + let result = df + .clone() + .lazy() + .with_columns([ + col("text").str().replace(lit(r"\d"), lit("-"), false), + col("text") + .str() + .replace_all(lit(r"\d"), lit("-"), false) + .alias("text_replace_all"), + ]) + .collect()?; + + println!("{}", result); // --8<-- [end:replace] // --8<-- [start:casing] - // Contribute the Rust translation of the Python example by opening a PR. + let addresses = df! ( + "addresses" => [ + "128 PERF st", + "Rust blVD, 158", + "PoLaRs Av, 12", + "1042 Query sq", + ] + )?; + + let addresses = addresses + .clone() + .lazy() + .select([ + col("addresses").alias("originals"), + col("addresses").str().to_titlecase(), + col("addresses").str().to_lowercase().alias("lower"), + col("addresses").str().to_uppercase().alias("upper"), + ]) + .collect()?; + + println!("{}", addresses); // --8<-- [end:casing] // --8<-- [start:strip] - // Contribute the Rust translation of the Python example by opening a PR. + let addr = col("addresses"); + let chars = lit(", 0123456789"); + let result = addresses + .clone() + .lazy() + .select([ + addr.clone().str().strip_chars(chars.clone()).alias("strip"), + addr.clone() + .str() + .strip_chars_end(chars.clone()) + .alias("end"), + addr.clone() + .str() + .strip_chars_start(chars.clone()) + .alias("start"), + addr.clone().str().strip_prefix(lit("128 ")).alias("prefix"), + addr.clone() + .str() + .strip_suffix(lit(", 158")) + .alias("suffix"), + ]) + .collect()?; + + println!("{}", result); // --8<-- [end:strip] // --8<-- [start:slice] - // Contribute the Rust translation of the Python example by opening a PR. + let df = df! ( + "fruits" => ["pear", "mango", "dragonfruit", "passionfruit"], + "n" => [1, -1, 4, -4], + )?; + + let result = df + .clone() + .lazy() + .with_columns([ + col("fruits") + .str() + .slice(col("n"), lit(NULL)) + .alias("slice"), + col("fruits").str().head(col("n")).alias("head"), + col("fruits").str().tail(col("n")).alias("tail"), + ]) + .collect()?; + + println!("{}", result); // --8<-- [end:slice] Ok(()) diff --git a/docs/source/user-guide/expressions/strings.md b/docs/source/user-guide/expressions/strings.md index e441517cc934..826e49cc7952 100644 --- a/docs/source/user-guide/expressions/strings.md +++ b/docs/source/user-guide/expressions/strings.md @@ -118,9 +118,10 @@ the string: | `strip_prefix` | Removes an exact substring prefix if present. | | `strip_suffix` | Removes an exact substring suffix if present. | -??? info "Similarity to Python string methods" `strip_chars` is similar to Python's string method -`strip` and `strip_prefix`/`strip_suffix` are similar to Python's string methods `removeprefix` and -`strip_suffix`, respectively. +??? info "Similarity to Python string methods" + + `strip_chars` is similar to Python's string method `strip` and `strip_prefix`/`strip_suffix` + are similar to Python's string methods `removeprefix` and `removesuffix`, respectively. It is important to understand that the first three functions interpret their string argument as a set of characters whereas the functions `strip_prefix` and `strip_suffix` do interpret their string