Skip to content

Commit

Permalink
feat(rust, python): add tree formatter for expressions (pola-rs#9684)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored and c-peters committed Jul 14, 2023
1 parent 743c83d commit f16d8ae
Show file tree
Hide file tree
Showing 16 changed files with 666 additions and 19 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ memchr = "2"
smartstring = { version = "1" }
multiversion = "0.7"
either = "1.8"
strum_macros = "0.25"

[workspace.dependencies.arrow]
package = "arrow2"
Expand Down
1 change: 1 addition & 0 deletions polars/polars-lazy/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ rayon.workspace = true
regex = { version = "1.6", optional = true }
serde = { version = "1", features = ["derive", "rc"], optional = true }
smartstring.workspace = true
strum_macros.workspace = true

[features]
# debugging utility
Expand Down
15 changes: 15 additions & 0 deletions polars/polars-lazy/polars-plan/src/dsl/meta.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
use std::fmt::Display;
use std::ops::BitAnd;

use super::*;
use crate::dsl::selector::Selector;
use crate::logical_plan::projection::is_regex_projection;
use crate::logical_plan::tree_format::TreeFmtVisitor;
use crate::logical_plan::visitor::{AexprNode, TreeWalker};

/// Specialized expressions for Categorical dtypes.
pub struct MetaNameSpace(pub(crate) Expr);
Expand Down Expand Up @@ -56,6 +59,7 @@ impl MetaNameSpace {
self.0
}

/// Whether this expression expands to multiple expressions.
pub fn has_multiple_outputs(&self) -> bool {
self.0.into_iter().any(|e| match e {
Expr::Selector(_) | Expr::Wildcard | Expr::Columns(_) | Expr::DtypeColumn(_) => true,
Expand All @@ -64,6 +68,7 @@ impl MetaNameSpace {
})
}

/// Whether this expression expands to multiple expressions with regex expansion.
pub fn is_regex_projection(&self) -> bool {
self.0.into_iter().any(|e| match e {
Expr::Column(name) => is_regex_projection(name),
Expand Down Expand Up @@ -117,4 +122,14 @@ impl MetaNameSpace {
Expr::Selector(Selector::new(self.0))
}
}

/// Get a hold to an implementor of the `Display` trait that will format as
/// the expression as a tree
pub fn into_tree_formatter(self) -> PolarsResult<impl Display> {
let mut arena = Default::default();
let node = to_aexpr(self.0, &mut arena);
let mut visitor = TreeFmtVisitor::new();
AexprNode::with_context(node, &mut arena, |ae_node| ae_node.visit(&mut visitor))?;
Ok(visitor)
}
}
93 changes: 77 additions & 16 deletions polars/polars-lazy/polars-plan/src/logical_plan/aexpr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ use polars_core::frame::groupby::GroupByMethod;
use polars_core::prelude::*;
use polars_core::utils::{get_time_units, try_get_supertype};
use polars_utils::arena::{Arena, Node};
use strum_macros::IntoStaticStr;

use crate::dsl::function_expr::FunctionExpr;
use crate::logical_plan::Context;
use crate::prelude::aexpr::NodeInputs::Single;
use crate::prelude::names::COUNT;
use crate::prelude::*;

#[derive(Clone, Debug)]
#[derive(Clone, Debug, IntoStaticStr)]
pub enum AAggExpr {
Min {
input: Node,
Expand Down Expand Up @@ -197,21 +198,59 @@ impl AExpr {
.map(|f| f.data_type().clone())
}

pub(crate) fn replace_input(self, input: Node) -> Self {
pub(crate) fn replace_inputs(mut self, inputs: &[Node]) -> Self {
use AExpr::*;
match self {
Alias(_, name) => Alias(input, name),
Cast {
expr: _,
data_type,
strict,
} => Cast {
expr: input,
data_type,
strict,
},
_ => todo!(),
}
let input = match &mut self {
Column(_) | Literal(_) | Wildcard | Count | Nth(_) => return self,
Alias(input, _) => input,
Cast { expr, .. } => expr,
Explode(input) | Slice { input, .. } | Cache { input, .. } => input,
BinaryExpr { left, right, .. } => {
*left = inputs[0];
*right = inputs[1];
return self;
}
Sort { expr, .. } | Take { expr, .. } => expr,
SortBy { expr, by, .. } => {
*expr = *inputs.last().unwrap();
by.clear();
by.extend_from_slice(&inputs[..inputs.len() - 1]);
return self;
}
Filter { input, .. } => input,
Agg(a) => {
a.set_input(inputs[0]);
return self;
}
Ternary {
truthy,
falsy,
predicate,
} => {
*truthy = inputs[0];
*falsy = inputs[1];
*predicate = inputs[2];
return self;
}
AnonymousFunction { input, .. } | Function { input, .. } => {
input.clear();
input.extend(inputs.iter().rev().copied());
return self;
}
Window {
function,
partition_by,
order_by,
..
} => {
*function = inputs[0];
partition_by.extend_from_slice(&inputs[1..]);
assert!(order_by.is_none());
return self;
}
};
*input = inputs[0];
self
}

pub(crate) fn get_input(&self) -> NodeInputs {
Expand All @@ -238,9 +277,11 @@ impl AExpr {
falsy,
predicate,
} => Many(vec![*truthy, *falsy, *predicate]),
// we iterate in reverse order, so that the lhs is popped first and will be found
// as the root columns/ input columns by `_suffix` and `_keep_name` etc.
AnonymousFunction { input, .. } | Function { input, .. } => match input.len() {
1 => Single(input[0]),
_ => Many(input.clone()),
_ => Many(input.iter().copied().rev().collect()),
},
Window {
function,
Expand Down Expand Up @@ -291,6 +332,26 @@ impl AAggExpr {
AggGroups(input) => Single(*input),
}
}
pub fn set_input(&mut self, input: Node) {
use AAggExpr::*;
let node = match self {
Min { input, .. } => input,
Max { input, .. } => input,
Median(input) => input,
NUnique(input) => input,
First(input) => input,
Last(input) => input,
Mean(input) => input,
Implode(input) => input,
Quantile { expr, .. } => expr,
Sum(input) => input,
Count(input) => input,
Std(input, _) => input,
Var(input, _) => input,
AggGroups(input) => input,
};
*node = input;
}
}

pub enum NodeInputs {
Expand Down
2 changes: 2 additions & 0 deletions polars/polars-lazy/polars-plan/src/logical_plan/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use polars_arrow::error::PolarsResult;

use crate::prelude::*;

#[macro_export]
macro_rules! push_expr {
($current_expr:expr, $push:ident, $iter:ident) => {{
use Expr::*;
Expand Down
3 changes: 3 additions & 0 deletions polars/polars-lazy/polars-plan/src/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ pub(crate) mod projection;
#[cfg(feature = "python")]
mod pyarrow;
mod schema;
#[cfg(feature = "meta")]
pub(crate) mod tree_format;
pub mod visitor;

pub use aexpr::*;
pub use alp::*;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ impl OptimizationRule for SlicePushDown {
let m = m.clone();
let input = m.get_input().first();
let new_input = pushdown(input, offset, length, expr_arena);
Some(m.replace_input(new_input))
Some(m.replace_inputs(&[new_input]))
}
Literal(lv) => {
match lv {
Expand Down
Loading

0 comments on commit f16d8ae

Please sign in to comment.