diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs index 65cdbf9fe62c..89487c4e7a79 100644 --- a/datafusion/core/src/physical_planner.rs +++ b/datafusion/core/src/physical_planner.rs @@ -1529,7 +1529,7 @@ pub fn create_window_expr( // unpack aliased logical expressions, e.g. "sum(col) over () as total" let (name, e) = match e { Expr::Alias(Alias { expr, name, .. }) => (name.clone(), expr.as_ref()), - _ => (e.display_name()?, e), + _ => (e.schema_name().to_string(), e), }; create_window_expr_with_name(e, name, logical_schema, execution_props) } @@ -1620,7 +1620,7 @@ pub fn create_aggregate_expr_and_maybe_filter( // unpack (nested) aliased logical expressions, e.g. "sum(col) as total" let (name, e) = match e { Expr::Alias(Alias { expr, name, .. }) => (Some(name.clone()), expr.as_ref()), - Expr::AggregateFunction(_) => (e.display_name().ok(), e), + Expr::AggregateFunction(_) => (Some(e.schema_name().to_string()), e), _ => (None, e), }; diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index edf45a244e1f..5030a95d3c8a 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -983,10 +983,35 @@ impl PartialOrd for Expr { } impl Expr { - /// Returns the name of this expression as it should appear in a schema. This name - /// will not include any CAST expressions. + #[deprecated(since = "40.0.0", note = "use schema_name instead")] pub fn display_name(&self) -> Result { - create_name(self) + Ok(self.schema_name().to_string()) + } + + /// The name of the column (field) that this `Expr` will produce. + /// + /// For example, for a projection (e.g. `SELECT `) the resulting arrow + /// [`Schema`] will have a field with this name. + /// + /// Note that the resulting string is subtlety different than the `Display` + /// representation for certain `Expr`. Some differences: + /// + /// 1. [`Expr::Alias`], which shows only the alias itself + /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression + /// + /// # Example + /// ``` + /// # use datafusion_expr::{col, lit}; + /// let expr = col("foo").eq(lit(42)); + /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string()); + /// + /// let expr = col("foo").alias("bar").eq(lit(11)); + /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string()); + /// ``` + /// + /// [`Schema`]: arrow::datatypes::Schema + pub fn schema_name(&self) -> impl Display + '_ { + SchemaDisplay(self) } /// Returns a full and complete string representation of this expression. @@ -1119,7 +1144,7 @@ impl Expr { match self { // call Expr::display_name() on a Expr::Sort will throw an error Expr::Sort(Sort { expr, .. }) => expr.name_for_alias(), - expr => expr.display_name(), + expr => Ok(expr.schema_name().to_string()), } } @@ -1127,7 +1152,6 @@ impl Expr { /// alias if necessary. pub fn alias_if_changed(self, original_name: String) -> Result { let new_name = self.name_for_alias()?; - if new_name == original_name { return Ok(self); } @@ -1749,6 +1773,287 @@ macro_rules! expr_vec_fmt { }}; } +struct SchemaDisplay<'a>(&'a Expr); +impl<'a> Display for SchemaDisplay<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self.0 { + // The same as Display + Expr::Column(_) + | Expr::Literal(_) + | Expr::ScalarVariable(..) + | Expr::Sort(_) + | Expr::OuterReferenceColumn(..) + | Expr::Placeholder(_) + | Expr::Wildcard { .. } => write!(f, "{}", self.0), + + Expr::AggregateFunction(AggregateFunction { + func, + args, + distinct, + filter, + order_by, + null_treatment, + }) => { + write!( + f, + "{}({}{})", + func.name(), + if *distinct { "DISTINCT " } else { "" }, + schema_name_from_exprs_comma_seperated_without_space(args)? + )?; + + if let Some(null_treatment) = null_treatment { + write!(f, " {}", null_treatment)?; + } + + if let Some(filter) = filter { + write!(f, " FILTER (WHERE {filter})")?; + }; + + if let Some(order_by) = order_by { + write!(f, " ORDER BY [{}]", schema_name_from_exprs(order_by)?)?; + }; + + Ok(()) + } + // expr is not shown since it is aliased + Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), + Expr::Between(Between { + expr, + negated, + low, + high, + }) => { + if *negated { + write!( + f, + "{} NOT BETWEEN {} AND {}", + SchemaDisplay(expr), + SchemaDisplay(low), + SchemaDisplay(high), + ) + } else { + write!( + f, + "{} BETWEEN {} AND {}", + SchemaDisplay(expr), + SchemaDisplay(low), + SchemaDisplay(high), + ) + } + } + Expr::BinaryExpr(BinaryExpr { left, op, right }) => { + write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),) + } + Expr::Case(Case { + expr, + when_then_expr, + else_expr, + }) => { + write!(f, "CASE ")?; + + if let Some(e) = expr { + write!(f, "{} ", SchemaDisplay(e))?; + } + + for (when, then) in when_then_expr { + write!( + f, + "WHEN {} THEN {} ", + SchemaDisplay(when), + SchemaDisplay(then), + )?; + } + + if let Some(e) = else_expr { + write!(f, "ELSE {} ", SchemaDisplay(e))?; + } + + write!(f, "END") + } + // cast expr is not shown to be consistant with Postgres and Spark + Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => { + write!(f, "{}", SchemaDisplay(expr)) + } + Expr::InList(InList { + expr, + list, + negated, + }) => { + let inlist_name = schema_name_from_exprs(list)?; + + if *negated { + write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name) + } else { + write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name) + } + } + Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"), + Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"), + Expr::GroupingSet(GroupingSet::Cube(exprs)) => { + write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?) + } + Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => { + write!(f, "GROUPING SETS (")?; + for exprs in lists_of_exprs.iter() { + write!(f, "({})", schema_name_from_exprs(exprs)?)?; + } + write!(f, ")") + } + Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { + write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?) + } + Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)), + Expr::IsNotNull(expr) => { + write!(f, "{} IS NOT NULL", SchemaDisplay(expr)) + } + Expr::IsUnknown(expr) => { + write!(f, "{} IS UNKNOWN", SchemaDisplay(expr)) + } + Expr::IsNotUnknown(expr) => { + write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr)) + } + Expr::InSubquery(InSubquery { negated: true, .. }) => { + write!(f, "NOT IN") + } + Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"), + Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)), + Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)), + Expr::IsNotTrue(expr) => { + write!(f, "{} IS NOT TRUE", SchemaDisplay(expr)) + } + Expr::IsNotFalse(expr) => { + write!(f, "{} IS NOT FALSE", SchemaDisplay(expr)) + } + Expr::Like(Like { + negated, + expr, + pattern, + escape_char, + case_insensitive, + }) => { + write!( + f, + "{} {}{} {}", + SchemaDisplay(expr), + if *negated { "NOT " } else { "" }, + if *case_insensitive { "ILIKE" } else { "LIKE" }, + SchemaDisplay(pattern), + )?; + + if let Some(char) = escape_char { + write!(f, " CHAR '{char}'")?; + } + + Ok(()) + } + Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)), + Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)), + Expr::Unnest(Unnest { expr }) => { + write!(f, "UNNEST({})", SchemaDisplay(expr)) + } + Expr::ScalarFunction(ScalarFunction { func, args }) => { + match func.schema_name(args) { + Ok(name) => { + write!(f, "{name}") + } + Err(e) => { + write!(f, "got error from schema_name {}", e) + } + } + } + Expr::ScalarSubquery(Subquery { subquery, .. }) => { + write!(f, "{}", subquery.schema().field(0).name()) + } + Expr::SimilarTo(Like { + negated, + expr, + pattern, + escape_char, + .. + }) => { + write!( + f, + "{} {} {}", + SchemaDisplay(expr), + if *negated { + "NOT SIMILAR TO" + } else { + "SIMILAR TO" + }, + SchemaDisplay(pattern), + )?; + if let Some(char) = escape_char { + write!(f, " CHAR '{char}'")?; + } + + Ok(()) + } + Expr::WindowFunction(WindowFunction { + fun, + args, + partition_by, + order_by, + window_frame, + null_treatment, + }) => { + write!( + f, + "{}({})", + fun, + schema_name_from_exprs_comma_seperated_without_space(args)? + )?; + + if let Some(null_treatment) = null_treatment { + write!(f, " {}", null_treatment)?; + } + + if !partition_by.is_empty() { + write!( + f, + " PARTITION BY [{}]", + schema_name_from_exprs(partition_by)? + )?; + } + + if !order_by.is_empty() { + write!(f, " ORDER BY [{}]", schema_name_from_exprs(order_by)?)?; + }; + + write!(f, " {window_frame}") + } + } + } +} + +/// Get schema_name for Vector of expressions +/// +/// Internal usage. Please call `schema_name_from_exprs` instead +// TODO: Use ", " to standardize the formatting of Vec, +// +pub(crate) fn schema_name_from_exprs_comma_seperated_without_space( + exprs: &[Expr], +) -> Result { + schema_name_from_exprs_inner(exprs, ",") +} + +/// Get schema_name for Vector of expressions +pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result { + schema_name_from_exprs_inner(exprs, ", ") +} + +fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result { + let mut s = String::new(); + for (i, e) in exprs.iter().enumerate() { + if i > 0 { + write!(&mut s, "{sep}")?; + } + write!(&mut s, "{}", SchemaDisplay(e))?; + } + + Ok(s) +} + /// Format expressions for display as part of a logical plan. In many cases, this will produce /// similar output to `Expr.name()` except that column names will be prefixed with '#'. impl fmt::Display for Expr { @@ -1827,6 +2132,10 @@ impl fmt::Display for Expr { Expr::ScalarFunction(fun) => { fmt_function(f, fun.name(), false, &fun.args, true) } + // TODO: use udf's display_name, need to fix the seperator issue, + // Expr::ScalarFunction(ScalarFunction { func, args }) => { + // write!(f, "{}", func.display_name(args).unwrap()) + // } Expr::WindowFunction(WindowFunction { fun, args, @@ -1961,6 +2270,7 @@ impl fmt::Display for Expr { }, Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"), Expr::Unnest(Unnest { expr }) => { + // TODO: use Display instead of Debug, there is non-unique expression name in projection issue. write!(f, "UNNEST({expr:?})") } } @@ -1979,7 +2289,6 @@ fn fmt_function( false => args.iter().map(|arg| format!("{arg:?}")).collect(), }; - // let args: Vec = args.iter().map(|arg| format!("{:?}", arg)).collect(); let distinct_str = match distinct { true => "DISTINCT ", false => "", @@ -1987,297 +2296,6 @@ fn fmt_function( write!(f, "{}({}{})", fun, distinct_str, args.join(", ")) } -fn write_function_name( - w: &mut W, - fun: &str, - distinct: bool, - args: &[Expr], -) -> Result<()> { - write!(w, "{}(", fun)?; - if distinct { - w.write_str("DISTINCT ")?; - } - write_names_join(w, args, ",")?; - w.write_str(")")?; - Ok(()) -} - -/// Returns a readable name of an expression based on the input schema. -/// This function recursively transverses the expression for names such as "CAST(a > 2)". -pub(crate) fn create_name(e: &Expr) -> Result { - let mut s = String::new(); - write_name(&mut s, e)?; - Ok(s) -} - -fn write_name(w: &mut W, e: &Expr) -> Result<()> { - match e { - Expr::Alias(Alias { name, .. }) => write!(w, "{}", name)?, - Expr::Column(c) => write!(w, "{}", c.flat_name())?, - Expr::OuterReferenceColumn(_, c) => write!(w, "outer_ref({})", c.flat_name())?, - Expr::ScalarVariable(_, variable_names) => { - write!(w, "{}", variable_names.join("."))? - } - Expr::Literal(value) => write!(w, "{value:?}")?, - Expr::BinaryExpr(binary_expr) => { - write_name(w, binary_expr.left.as_ref())?; - write!(w, " {} ", binary_expr.op)?; - write_name(w, binary_expr.right.as_ref())?; - } - Expr::Like(Like { - negated, - expr, - pattern, - escape_char, - case_insensitive, - }) => { - write!( - w, - "{} {}{} {}", - expr, - if *negated { "NOT " } else { "" }, - if *case_insensitive { "ILIKE" } else { "LIKE" }, - pattern, - )?; - if let Some(char) = escape_char { - write!(w, " CHAR '{char}'")?; - } - } - Expr::SimilarTo(Like { - negated, - expr, - pattern, - escape_char, - case_insensitive: _, - }) => { - write!( - w, - "{} {} {}", - expr, - if *negated { - "NOT SIMILAR TO" - } else { - "SIMILAR TO" - }, - pattern, - )?; - if let Some(char) = escape_char { - write!(w, " CHAR '{char}'")?; - } - } - Expr::Case(case) => { - write!(w, "CASE ")?; - if let Some(e) = &case.expr { - write_name(w, e)?; - w.write_str(" ")?; - } - for (when, then) in &case.when_then_expr { - w.write_str("WHEN ")?; - write_name(w, when)?; - w.write_str(" THEN ")?; - write_name(w, then)?; - w.write_str(" ")?; - } - if let Some(e) = &case.else_expr { - w.write_str("ELSE ")?; - write_name(w, e)?; - w.write_str(" ")?; - } - w.write_str("END")?; - } - Expr::Cast(Cast { expr, .. }) => { - // CAST does not change the expression name - write_name(w, expr)?; - } - Expr::TryCast(TryCast { expr, .. }) => { - // CAST does not change the expression name - write_name(w, expr)?; - } - Expr::Not(expr) => { - w.write_str("NOT ")?; - write_name(w, expr)?; - } - Expr::Negative(expr) => { - w.write_str("(- ")?; - write_name(w, expr)?; - w.write_str(")")?; - } - Expr::IsNull(expr) => { - write_name(w, expr)?; - w.write_str(" IS NULL")?; - } - Expr::IsNotNull(expr) => { - write_name(w, expr)?; - w.write_str(" IS NOT NULL")?; - } - Expr::IsTrue(expr) => { - write_name(w, expr)?; - w.write_str(" IS TRUE")?; - } - Expr::IsFalse(expr) => { - write_name(w, expr)?; - w.write_str(" IS FALSE")?; - } - Expr::IsUnknown(expr) => { - write_name(w, expr)?; - w.write_str(" IS UNKNOWN")?; - } - Expr::IsNotTrue(expr) => { - write_name(w, expr)?; - w.write_str(" IS NOT TRUE")?; - } - Expr::IsNotFalse(expr) => { - write_name(w, expr)?; - w.write_str(" IS NOT FALSE")?; - } - Expr::IsNotUnknown(expr) => { - write_name(w, expr)?; - w.write_str(" IS NOT UNKNOWN")?; - } - Expr::Exists(Exists { negated: true, .. }) => w.write_str("NOT EXISTS")?, - Expr::Exists(Exists { negated: false, .. }) => w.write_str("EXISTS")?, - Expr::InSubquery(InSubquery { negated: true, .. }) => w.write_str("NOT IN")?, - Expr::InSubquery(InSubquery { negated: false, .. }) => w.write_str("IN")?, - Expr::ScalarSubquery(subquery) => { - w.write_str(subquery.subquery.schema().field(0).name().as_str())?; - } - Expr::Unnest(Unnest { expr }) => { - w.write_str("unnest(")?; - write_name(w, expr)?; - w.write_str(")")?; - } - Expr::ScalarFunction(fun) => { - w.write_str(fun.func.display_name(&fun.args)?.as_str())?; - } - Expr::WindowFunction(WindowFunction { - fun, - args, - window_frame, - partition_by, - order_by, - null_treatment, - }) => { - write_function_name(w, &fun.to_string(), false, args)?; - - if let Some(nt) = null_treatment { - w.write_str(" ")?; - write!(w, "{}", nt)?; - } - if !partition_by.is_empty() { - w.write_str(" ")?; - write!(w, "PARTITION BY [{}]", expr_vec_fmt!(partition_by))?; - } - if !order_by.is_empty() { - w.write_str(" ")?; - write!(w, "ORDER BY [{}]", expr_vec_fmt!(order_by))?; - } - w.write_str(" ")?; - write!(w, "{window_frame}")?; - } - Expr::AggregateFunction(AggregateFunction { - func, - distinct, - args, - filter, - order_by, - null_treatment, - }) => { - write_function_name(w, func.name(), *distinct, args)?; - if let Some(fe) = filter { - write!(w, " FILTER (WHERE {fe})")?; - }; - if let Some(order_by) = order_by { - write!(w, " ORDER BY [{}]", expr_vec_fmt!(order_by))?; - }; - if let Some(nt) = null_treatment { - write!(w, " {}", nt)?; - } - } - Expr::GroupingSet(grouping_set) => match grouping_set { - GroupingSet::Rollup(exprs) => { - write!(w, "ROLLUP (")?; - write_names(w, exprs.as_slice())?; - write!(w, ")")?; - } - GroupingSet::Cube(exprs) => { - write!(w, "CUBE (")?; - write_names(w, exprs.as_slice())?; - write!(w, ")")?; - } - GroupingSet::GroupingSets(lists_of_exprs) => { - write!(w, "GROUPING SETS (")?; - for (i, exprs) in lists_of_exprs.iter().enumerate() { - if i != 0 { - write!(w, ", ")?; - } - write!(w, "(")?; - write_names(w, exprs.as_slice())?; - write!(w, ")")?; - } - write!(w, ")")?; - } - }, - Expr::InList(InList { - expr, - list, - negated, - }) => { - write_name(w, expr)?; - let list = list.iter().map(create_name); - if *negated { - write!(w, " NOT IN ({list:?})")?; - } else { - write!(w, " IN ({list:?})")?; - } - } - Expr::Between(Between { - expr, - negated, - low, - high, - }) => { - write_name(w, expr)?; - if *negated { - write!(w, " NOT BETWEEN ")?; - } else { - write!(w, " BETWEEN ")?; - } - write_name(w, low)?; - write!(w, " AND ")?; - write_name(w, high)?; - } - Expr::Sort { .. } => { - return internal_err!("Create name does not support sort expression") - } - Expr::Wildcard { qualifier } => match qualifier { - Some(qualifier) => { - return internal_err!( - "Create name does not support qualified wildcard, got {qualifier}" - ) - } - None => write!(w, "*")?, - }, - Expr::Placeholder(Placeholder { id, .. }) => write!(w, "{}", id)?, - }; - Ok(()) -} - -fn write_names(w: &mut W, exprs: &[Expr]) -> Result<()> { - exprs.iter().try_for_each(|e| write_name(w, e)) -} - -fn write_names_join(w: &mut W, exprs: &[Expr], sep: &str) -> Result<()> { - let mut iter = exprs.iter(); - if let Some(first_arg) = iter.next() { - write_name(w, first_arg)?; - } - for a in iter { - w.write_str(sep)?; - write_name(w, a)?; - } - Ok(()) -} - pub fn create_function_physical_name( fun: &str, distinct: bool, @@ -2394,7 +2412,7 @@ fn create_physical_name(e: &Expr, is_first_expr: bool) -> Result { let expr = create_physical_name(expr, false)?; Ok(format!("{expr} IS NOT UNKNOWN")) } - Expr::ScalarFunction(fun) => fun.func.display_name(&fun.args), + Expr::ScalarFunction(fun) => fun.func.schema_name(&fun.args), Expr::WindowFunction(WindowFunction { fun, args, @@ -2552,7 +2570,6 @@ mod test { let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END"; assert_eq!(expected, expr.canonical_name()); assert_eq!(expected, format!("{expr}")); - assert_eq!(expected, expr.display_name()?); Ok(()) } @@ -2567,7 +2584,7 @@ mod test { assert_eq!(expected_canonical, format!("{expr}")); // note that CAST intentionally has a name that is different from its `Display` // representation. CAST does not change the name of expressions. - assert_eq!("Float32(1.23)", expr.display_name()?); + assert_eq!("Float32(1.23)", expr.schema_name().to_string()); Ok(()) } diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs index bf2bfe2c3932..0dc41d4a9ac1 100644 --- a/datafusion/expr/src/expr_rewriter/mod.rs +++ b/datafusion/expr/src/expr_rewriter/mod.rs @@ -173,7 +173,7 @@ pub fn create_col_from_scalar_expr( name, )), _ => { - let scalar_column = scalar_expr.display_name()?; + let scalar_column = scalar_expr.schema_name().to_string(); Ok(Column::new( Some::(subqry_alias.into()), scalar_column, @@ -475,16 +475,14 @@ mod test { let expr = rewrite_preserving_name(expr_from.clone(), &mut rewriter).unwrap(); let original_name = match &expr_from { - Expr::Sort(Sort { expr, .. }) => expr.display_name(), - expr => expr.display_name(), - } - .unwrap(); + Expr::Sort(Sort { expr, .. }) => expr.schema_name().to_string(), + expr => expr.schema_name().to_string(), + }; let new_name = match &expr { - Expr::Sort(Sort { expr, .. }) => expr.display_name(), - expr => expr.display_name(), - } - .unwrap(); + Expr::Sort(Sort { expr, .. }) => expr.schema_name().to_string(), + expr => expr.schema_name().to_string(), + }; assert_eq!( original_name, new_name, diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs index 3d79caa21fde..bbb855801c3e 100644 --- a/datafusion/expr/src/expr_rewriter/order_by.rs +++ b/datafusion/expr/src/expr_rewriter/order_by.rs @@ -108,8 +108,8 @@ fn rewrite_in_terms_of_projection( }; // expr is an actual expr like min(t.c2), but we are looking - // for a column with the same "min(C2)", so translate there - let name = normalized_expr.display_name()?; + // for a column with the same "MIN(C2)", so translate there + let name = normalized_expr.schema_name().to_string(); let search_col = Expr::Column(Column { relation: None, diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 676903d59a07..1c645086c534 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -473,7 +473,7 @@ impl ExprSchemable for Expr { let (data_type, nullable) = self.data_type_and_nullable(input_schema)?; Ok(( None, - Field::new(self.display_name()?, data_type, nullable) + Field::new(self.schema_name().to_string(), data_type, nullable) .with_metadata(self.metadata(input_schema)?) .into(), )) diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs index aa2ea4ae1c26..4ef346656ff4 100644 --- a/datafusion/expr/src/logical_plan/builder.rs +++ b/datafusion/expr/src/logical_plan/builder.rs @@ -1298,15 +1298,15 @@ fn add_group_by_exprs_from_dependencies( // c1 + 1` produces an output field named `"c1 + 1"` let mut group_by_field_names = group_expr .iter() - .map(|e| e.display_name()) - .collect::>>()?; + .map(|e| e.schema_name().to_string()) + .collect::>(); if let Some(target_indices) = get_target_functional_dependencies(schema, &group_by_field_names) { for idx in target_indices { let expr = Expr::Column(Column::from(schema.qualified_field(idx))); - let expr_name = expr.display_name()?; + let expr_name = expr.schema_name().to_string(); if !group_by_field_names.contains(&expr_name) { group_by_field_names.push(expr_name); group_expr.push(expr); @@ -1323,7 +1323,7 @@ pub(crate) fn validate_unique_names<'a>( let mut unique_names = HashMap::new(); expressions.into_iter().enumerate().try_for_each(|(position, expr)| { - let name = expr.display_name()?; + let name = expr.schema_name().to_string(); match unique_names.get(&name) { None => { unique_names.insert(name, (position, expr)); @@ -1557,7 +1557,7 @@ pub fn wrap_projection_for_join_if_necessary( if let Some(col) = key.try_as_col() { Ok(col.clone()) } else { - let name = key.display_name()?; + let name = key.schema_name().to_string(); Ok(Column::from_name(name)) } }) diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs index 02176a506a25..c5538d8880a7 100644 --- a/datafusion/expr/src/logical_plan/plan.rs +++ b/datafusion/expr/src/logical_plan/plan.rs @@ -2740,8 +2740,8 @@ fn calc_func_dependencies_for_aggregate( if !contains_grouping_set(group_expr) { let group_by_expr_names = group_expr .iter() - .map(|item| item.display_name()) - .collect::>>()?; + .map(|item| item.schema_name().to_string()) + .collect::>(); let aggregate_func_dependencies = aggregate_functional_dependencies( input.schema(), &group_by_expr_names, diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 5ba6e3007ead..34b5909f0a5a 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -26,7 +26,7 @@ use arrow::datatypes::DataType; use datafusion_common::{not_impl_err, ExprSchema, Result}; -use crate::expr::create_name; +use crate::expr::schema_name_from_exprs_comma_seperated_without_space; use crate::interval_arithmetic::Interval; use crate::simplify::{ExprSimplifyResult, SimplifyInfo}; use crate::sort_properties::{ExprProperties, SortProperties}; @@ -154,6 +154,13 @@ impl ScalarUDF { self.inner.display_name(args) } + /// Returns this function's schema_name. + /// + /// See [`ScalarUDFImpl::schema_name`] for more details + pub fn schema_name(&self, args: &[Expr]) -> Result { + self.inner.schema_name(args) + } + /// Returns the aliases for this function. /// /// See [`ScalarUDF::with_aliases`] for more details @@ -345,12 +352,23 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { fn name(&self) -> &str; /// Returns the user-defined display name of the UDF given the arguments - /// fn display_name(&self, args: &[Expr]) -> Result { - let names: Vec = args.iter().map(create_name).collect::>()?; + let names: Vec = args.iter().map(ToString::to_string).collect(); + // TODO: join with ", " to standardize the formatting of Vec, Ok(format!("{}({})", self.name(), names.join(","))) } + /// Returns the name of the column this expression would create + /// + /// See [`Expr::schema_name`] for details + fn schema_name(&self, args: &[Expr]) -> Result { + Ok(format!( + "{}({})", + self.name(), + schema_name_from_exprs_comma_seperated_without_space(args)? + )) + } + /// Returns the function's [`Signature`] for information about what input /// types are accepted and the function's Volatility. fn signature(&self) -> &Signature; diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 65a70b673266..c3e4505ed19c 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -798,7 +798,9 @@ pub fn expr_as_column_expr(expr: &Expr, plan: &LogicalPlan) -> Result { let (qualifier, field) = plan.schema().qualified_field_from_column(col)?; Ok(Expr::from(Column::from((qualifier, field)))) } - _ => Ok(Expr::Column(Column::from_name(expr.display_name()?))), + _ => Ok(Expr::Column(Column::from_name( + expr.schema_name().to_string(), + ))), } } diff --git a/datafusion/functions-nested/src/expr_ext.rs b/datafusion/functions-nested/src/expr_ext.rs index 3524d62d0bc4..4da4a3f583b7 100644 --- a/datafusion/functions-nested/src/expr_ext.rs +++ b/datafusion/functions-nested/src/expr_ext.rs @@ -38,7 +38,7 @@ use crate::extract::{array_element, array_slice}; /// # use datafusion_functions_nested::expr_ext::IndexAccessor; /// let expr = col("c1") /// .index(lit(3)); -/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(3)]"); +/// assert_eq!(expr.schema_name().to_string(), "c1[Int32(3)]"); /// ``` pub trait IndexAccessor { fn index(self, key: Expr) -> Expr; @@ -68,7 +68,7 @@ impl IndexAccessor for Expr { /// # use datafusion_functions_nested::expr_ext::SliceAccessor; /// let expr = col("c1") /// .range(lit(2), lit(4)); -/// assert_eq!(expr.display_name().unwrap(), "c1[Int32(2):Int32(4)]"); +/// assert_eq!(expr.schema_name().to_string(), "c1[Int32(2):Int32(4)]"); /// ``` pub trait SliceAccessor { fn range(self, start: Expr, stop: Expr) -> Expr; diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index af4e36926b68..b9e82f371369 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -40,7 +40,7 @@ use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; use std::sync::Arc; -use crate::utils::{get_arg_name, make_scalar_function}; +use crate::utils::make_scalar_function; // Create static instances of ScalarUDFs for each function make_udf_expr_and_func!( @@ -97,11 +97,24 @@ impl ScalarUDFImpl for ArrayElement { } fn display_name(&self, args: &[Expr]) -> Result { - Ok(format!( - "{}[{}]", - get_arg_name(args, 0), - get_arg_name(args, 1) - )) + let args_name = args.iter().map(ToString::to_string).collect::>(); + if args_name.len() != 2 { + return exec_err!("expect 2 args, got {}", args_name.len()); + } + + Ok(format!("{}[{}]", args_name[0], args_name[1])) + } + + fn schema_name(&self, args: &[Expr]) -> Result { + let args_name = args + .iter() + .map(|e| e.schema_name().to_string()) + .collect::>(); + if args_name.len() != 2 { + return exec_err!("expect 2 args, got {}", args_name.len()); + } + + Ok(format!("{}[{}]", args_name[0], args_name[1])) } fn signature(&self) -> &Signature { @@ -254,14 +267,24 @@ impl ScalarUDFImpl for ArraySlice { } fn display_name(&self, args: &[Expr]) -> Result { - Ok(format!( - "{}[{}]", - get_arg_name(args, 0), - (1..args.len()) - .map(|i| get_arg_name(args, i)) - .collect::>() - .join(":") - )) + let args_name = args.iter().map(ToString::to_string).collect::>(); + if let Some((arr, indexes)) = args_name.split_first() { + Ok(format!("{arr}[{}]", indexes.join(":"))) + } else { + exec_err!("no argument") + } + } + + fn schema_name(&self, args: &[Expr]) -> Result { + let args_name = args + .iter() + .map(|e| e.schema_name().to_string()) + .collect::>(); + if let Some((arr, indexes)) = args_name.split_first() { + Ok(format!("{arr}[{}]", indexes.join(":"))) + } else { + exec_err!("no argument") + } } fn name(&self) -> &str { diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index f396c3b22581..688e1633e5cf 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -32,7 +32,7 @@ use datafusion_common::{exec_err, plan_err, Result, ScalarValue}; use core::any::type_name; use datafusion_common::DataFusionError; -use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionImplementation}; +use datafusion_expr::{ColumnarValue, ScalarFunctionImplementation}; macro_rules! downcast_arg { ($ARG:expr, $ARRAY_TYPE:ident) => {{ @@ -253,11 +253,6 @@ pub(crate) fn compute_array_dims( } } -/// Returns the name of the argument at index `i`, or an empty string if the index is out of bounds. -pub(super) fn get_arg_name(args: &[Expr], i: usize) -> String { - args.get(i).map(ToString::to_string).unwrap_or_default() -} - #[cfg(test)] mod tests { use super::*; diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs index d80df0f334ab..af05f447f1c1 100644 --- a/datafusion/functions/src/core/expr_ext.rs +++ b/datafusion/functions/src/core/expr_ext.rs @@ -41,7 +41,7 @@ use super::expr_fn::get_field; /// # use datafusion_functions::core::expr_ext::FieldAccessor; /// let expr = col("c1") /// .field("my_field"); -/// assert_eq!(expr.display_name().unwrap(), "c1[my_field]"); +/// assert_eq!(expr.schema_name().to_string(), "c1[my_field]"); /// ``` pub trait FieldAccessor { fn field(self, name: impl Literal) -> Expr; diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs index 2c2e36b91b13..a51f895c5084 100644 --- a/datafusion/functions/src/core/getfield.rs +++ b/datafusion/functions/src/core/getfield.rs @@ -74,7 +74,27 @@ impl ScalarUDFImpl for GetFieldFunc { } }; - Ok(format!("{}[{}]", args[0].display_name()?, name)) + Ok(format!("{}[{}]", args[0], name)) + } + + fn schema_name(&self, args: &[Expr]) -> Result { + if args.len() != 2 { + return exec_err!( + "get_field function requires 2 arguments, got {}", + args.len() + ); + } + + let name = match &args[1] { + Expr::Literal(name) => name, + _ => { + return exec_err!( + "get_field function requires the argument field_name to be a string" + ); + } + }; + + Ok(format!("{}[{}]", args[0].schema_name(), name)) } fn signature(&self) -> &Signature { diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs index 2823b0fca2d1..2bb859d84ad7 100644 --- a/datafusion/optimizer/src/analyzer/type_coercion.rs +++ b/datafusion/optimizer/src/analyzer/type_coercion.rs @@ -1040,9 +1040,7 @@ mod test { let expr = col("a").in_list(vec![lit(1_i32), lit(4_i8), lit(8_i64)], false); let empty = empty_with_type(DataType::Int64); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); - let expected = - "Projection: a IN ([CAST(Int32(1) AS Int64), CAST(Int8(4) AS Int64), Int64(8)]) AS a IN (Map { iter: Iter([Literal(Int32(1)), Literal(Int8(4)), Literal(Int64(8))]) })\ - \n EmptyRelation"; + let expected = "Projection: a IN ([CAST(Int32(1) AS Int64), CAST(Int8(4) AS Int64), Int64(8)])\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?; // a in (1,4,8), a is decimal @@ -1055,9 +1053,7 @@ mod test { )?), })); let plan = LogicalPlan::Projection(Projection::try_new(vec![expr], empty)?); - let expected = - "Projection: CAST(a AS Decimal128(24, 4)) IN ([CAST(Int32(1) AS Decimal128(24, 4)), CAST(Int8(4) AS Decimal128(24, 4)), CAST(Int64(8) AS Decimal128(24, 4))]) AS a IN (Map { iter: Iter([Literal(Int32(1)), Literal(Int8(4)), Literal(Int64(8))]) })\ - \n EmptyRelation"; + let expected = "Projection: CAST(a AS Decimal128(24, 4)) IN ([CAST(Int32(1) AS Decimal128(24, 4)), CAST(Int8(4) AS Decimal128(24, 4)), CAST(Int64(8) AS Decimal128(24, 4))])\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected) } @@ -1150,8 +1146,7 @@ mod test { let like_expr = Expr::Like(Like::new(false, expr, pattern, None, false)); let empty = empty_with_type(DataType::Utf8); let plan = LogicalPlan::Projection(Projection::try_new(vec![like_expr], empty)?); - let expected = "Projection: a LIKE CAST(NULL AS Utf8) AS a LIKE NULL\ - \n EmptyRelation"; + let expected = "Projection: a LIKE CAST(NULL AS Utf8)\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?; let expr = Box::new(col("a")); @@ -1179,8 +1174,7 @@ mod test { let ilike_expr = Expr::Like(Like::new(false, expr, pattern, None, true)); let empty = empty_with_type(DataType::Utf8); let plan = LogicalPlan::Projection(Projection::try_new(vec![ilike_expr], empty)?); - let expected = "Projection: a ILIKE CAST(NULL AS Utf8) AS a ILIKE NULL\ - \n EmptyRelation"; + let expected = "Projection: a ILIKE CAST(NULL AS Utf8)\n EmptyRelation"; assert_analyzed_plan_eq(Arc::new(TypeCoercion::new()), plan, expected)?; let expr = Box::new(col("a")); diff --git a/datafusion/optimizer/src/common_subexpr_eliminate.rs b/datafusion/optimizer/src/common_subexpr_eliminate.rs index 9cd9e4dece26..45e5409ae9ac 100644 --- a/datafusion/optimizer/src/common_subexpr_eliminate.rs +++ b/datafusion/optimizer/src/common_subexpr_eliminate.rs @@ -1108,7 +1108,7 @@ impl TreeNodeRewriter for CommonSubexprRewriter<'_, '_> { self.down_index += 1; } - let expr_name = expr.display_name()?; + let expr_name = expr.schema_name().to_string(); let (_, expr_alias) = self.common_exprs.entry(expr_id).or_insert_with(|| { let expr_alias = self.alias_generator.next(CSE_PREFIX); diff --git a/datafusion/optimizer/src/decorrelate.rs b/datafusion/optimizer/src/decorrelate.rs index fdd9ef8a8b0b..16b4e43abcd5 100644 --- a/datafusion/optimizer/src/decorrelate.rs +++ b/datafusion/optimizer/src/decorrelate.rs @@ -452,7 +452,8 @@ fn agg_exprs_evaluation_result_on_empty_batch( let simplifier = ExprSimplifier::new(info); let result_expr = simplifier.simplify(result_expr)?; if matches!(result_expr, Expr::Literal(ScalarValue::Int64(_))) { - expr_result_map_for_count_bug.insert(e.display_name()?, result_expr); + expr_result_map_for_count_bug + .insert(e.schema_name().to_string(), result_expr); } } Ok(()) @@ -490,7 +491,7 @@ fn proj_exprs_evaluation_result_on_empty_batch( let expr_name = match expr { Expr::Alias(Alias { name, .. }) => name.to_string(), Expr::Column(Column { relation: _, name }) => name.to_string(), - _ => expr.display_name()?, + _ => expr.schema_name().to_string(), }; expr_result_map_for_count_bug.insert(expr_name, result_expr); } @@ -546,8 +547,8 @@ fn filter_exprs_evaluation_result_on_empty_batch( )], else_expr: Some(Box::new(Expr::Literal(ScalarValue::Null))), }); - expr_result_map_for_count_bug - .insert(new_expr.display_name()?, new_expr); + let expr_key = new_expr.schema_name().to_string(); + expr_result_map_for_count_bug.insert(expr_key, new_expr); } None } diff --git a/datafusion/optimizer/src/optimize_projections/mod.rs b/datafusion/optimizer/src/optimize_projections/mod.rs index d014b9149aab..ac4ed87a4a1a 100644 --- a/datafusion/optimizer/src/optimize_projections/mod.rs +++ b/datafusion/optimizer/src/optimize_projections/mod.rs @@ -135,8 +135,8 @@ fn optimize_projections( let group_by_expr_existing = aggregate .group_expr .iter() - .map(|group_by_expr| group_by_expr.display_name()) - .collect::>>()?; + .map(|group_by_expr| group_by_expr.schema_name().to_string()) + .collect::>(); let new_group_bys = if let Some(simplest_groupby_indices) = get_required_group_by_exprs_indices( @@ -1928,8 +1928,8 @@ mod tests { WindowFunctionDefinition::AggregateUDF(max_udaf()), vec![col("test.b")], )); - let col1 = col(max1.display_name()?); - let col2 = col(max2.display_name()?); + let col1 = col(max1.schema_name().to_string()); + let col2 = col(max2.schema_name().to_string()); let plan = LogicalPlanBuilder::from(table_scan) .window(vec![max1])? diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs index 4254d3464662..8455919c35a8 100644 --- a/datafusion/optimizer/src/push_down_filter.rs +++ b/datafusion/optimizer/src/push_down_filter.rs @@ -817,7 +817,7 @@ impl OptimizerRule for PushDownFilter { let group_expr_columns = agg .group_expr .iter() - .map(|e| Ok(Column::from_qualified_name(e.display_name()?))) + .map(|e| Ok(Column::from_qualified_name(e.schema_name().to_string()))) .collect::>>()?; let predicates = split_conjunction_owned(filter.predicate.clone()); @@ -838,7 +838,7 @@ impl OptimizerRule for PushDownFilter { // So we need create a replace_map, add {`a+b` --> Expr(Column(a)+Column(b))} let mut replace_map = HashMap::new(); for expr in &agg.group_expr { - replace_map.insert(expr.display_name()?, expr.clone()); + replace_map.insert(expr.schema_name().to_string(), expr.clone()); } let replaced_push_predicates = push_predicates .into_iter() diff --git a/datafusion/optimizer/src/scalar_subquery_to_join.rs b/datafusion/optimizer/src/scalar_subquery_to_join.rs index 3c66da21aff6..c79180b79256 100644 --- a/datafusion/optimizer/src/scalar_subquery_to_join.rs +++ b/datafusion/optimizer/src/scalar_subquery_to_join.rs @@ -188,9 +188,9 @@ impl OptimizerRule for ScalarSubqueryToJoin { let mut proj_exprs = vec![]; for expr in projection.expr.iter() { - let old_expr_name = expr.display_name()?; + let old_expr_name = expr.schema_name().to_string(); let new_expr = expr_to_rewrite_expr_map.get(expr).unwrap(); - let new_expr_name = new_expr.display_name()?; + let new_expr_name = new_expr.schema_name().to_string(); if new_expr_name != old_expr_name { proj_exprs.push(new_expr.clone().alias(old_expr_name)) } else { diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs b/datafusion/optimizer/src/single_distinct_to_groupby.rs index 5b4395792447..30cae17eaf9f 100644 --- a/datafusion/optimizer/src/single_distinct_to_groupby.rs +++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs @@ -194,7 +194,7 @@ impl OptimizerRule for SingleDistinctToGroupBy { } let arg = args.swap_remove(0); - if group_fields_set.insert(arg.display_name()?) { + if group_fields_set.insert(arg.schema_name().to_string()) { inner_group_exprs .push(arg.alias(SINGLE_DISTINCT_ALIAS)); } diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index b96398ef217f..e5c226418441 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -592,7 +592,9 @@ async fn roundtrip_logical_plan_copy_to_parquet() -> Result<()> { // Set specific Parquet format options let mut key_value_metadata = HashMap::new(); key_value_metadata.insert("test".to_string(), Some("test".to_string())); - parquet_format.key_value_metadata = key_value_metadata.clone(); + parquet_format + .key_value_metadata + .clone_from(&key_value_metadata); parquet_format.global.allow_single_file_parallelism = false; parquet_format.global.created_by = "test".to_string(); diff --git a/datafusion/proto/tests/cases/serialize.rs b/datafusion/proto/tests/cases/serialize.rs index cc683e778ebc..f28098d83b97 100644 --- a/datafusion/proto/tests/cases/serialize.rs +++ b/datafusion/proto/tests/cases/serialize.rs @@ -276,7 +276,7 @@ fn test_expression_serialization_roundtrip() { /// Extracts the first part of a function name /// 'foo(bar)' -> 'foo' fn extract_function_name(expr: &Expr) -> String { - let name = expr.display_name().unwrap(); + let name = expr.schema_name().to_string(); name.split('(').next().unwrap().to_string() } } diff --git a/datafusion/sql/src/unparser/utils.rs b/datafusion/sql/src/unparser/utils.rs index 71f64f1cf459..c1b3fe18f7e7 100644 --- a/datafusion/sql/src/unparser/utils.rs +++ b/datafusion/sql/src/unparser/utils.rs @@ -115,7 +115,7 @@ pub(crate) fn unproject_window_exprs(expr: &Expr, windows: &[&Window]) -> Result if let Some(unproj) = windows .iter() .flat_map(|w| w.window_expr.iter()) - .find(|window_expr| window_expr.display_name().unwrap() == c.name) + .find(|window_expr| window_expr.schema_name().to_string() == c.name) { Ok(Transformed::yes(unproj.clone())) } else { diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs index 3b044646e6cb..5cdc546e0267 100644 --- a/datafusion/sql/src/utils.rs +++ b/datafusion/sql/src/utils.rs @@ -329,7 +329,7 @@ pub(crate) fn transform_bottom_unnest( // Full context, we are trying to plan the execution as InnerProjection->Unnest->OuterProjection // inside unnest execution, each column inside the inner projection // will be transformed into new columns. Thus we need to keep track of these placeholding column names - let placeholder_name = unnest_expr.display_name()?; + let placeholder_name = unnest_expr.schema_name().to_string(); unnest_placeholder_columns.push(placeholder_name.clone()); // Add alias for the argument expression, to avoid naming conflicts @@ -402,7 +402,7 @@ pub(crate) fn transform_bottom_unnest( } else { // We need to evaluate the expr in the inner projection, // outer projection just select its name - let column_name = transformed_expr.display_name()?; + let column_name = transformed_expr.schema_name().to_string(); inner_projection_exprs.push(transformed_expr); Ok(vec![Expr::Column(Column::from_name(column_name))]) } @@ -469,16 +469,16 @@ mod tests { assert_eq!( transformed_exprs, vec![ - col("unnest(struct_col).field1"), - col("unnest(struct_col).field2"), + col("UNNEST(struct_col).field1"), + col("UNNEST(struct_col).field2"), ] ); - assert_eq!(unnest_placeholder_columns, vec!["unnest(struct_col)"]); + assert_eq!(unnest_placeholder_columns, vec!["UNNEST(struct_col)"]); // still reference struct_col in original schema but with alias, // to avoid colliding with the projection on the column itself if any assert_eq!( inner_projection_exprs, - vec![col("struct_col").alias("unnest(struct_col)"),] + vec![col("struct_col").alias("UNNEST(struct_col)"),] ); // unnest(array_col) + 1 @@ -491,12 +491,12 @@ mod tests { )?; assert_eq!( unnest_placeholder_columns, - vec!["unnest(struct_col)", "unnest(array_col)"] + vec!["UNNEST(struct_col)", "UNNEST(array_col)"] ); // only transform the unnest children assert_eq!( transformed_exprs, - vec![col("unnest(array_col)").add(lit(1i64))] + vec![col("UNNEST(array_col)").add(lit(1i64))] ); // keep appending to the current vector @@ -505,8 +505,8 @@ mod tests { assert_eq!( inner_projection_exprs, vec![ - col("struct_col").alias("unnest(struct_col)"), - col("array_col").alias("unnest(array_col)") + col("struct_col").alias("UNNEST(struct_col)"), + col("array_col").alias("UNNEST(array_col)") ] ); @@ -553,17 +553,17 @@ mod tests { // Only the inner most/ bottom most unnest is transformed assert_eq!( transformed_exprs, - vec![unnest(col("unnest(struct_col[matrix])"))] + vec![unnest(col("UNNEST(struct_col[matrix])"))] ); assert_eq!( unnest_placeholder_columns, - vec!["unnest(struct_col[matrix])"] + vec!["UNNEST(struct_col[matrix])"] ); assert_eq!( inner_projection_exprs, vec![col("struct_col") .field("matrix") - .alias("unnest(struct_col[matrix])"),] + .alias("UNNEST(struct_col[matrix])"),] ); Ok(()) diff --git a/datafusion/sql/tests/cases/plan_to_sql.rs b/datafusion/sql/tests/cases/plan_to_sql.rs index 8f9f1dd78f93..59e0151c15a1 100644 --- a/datafusion/sql/tests/cases/plan_to_sql.rs +++ b/datafusion/sql/tests/cases/plan_to_sql.rs @@ -415,10 +415,10 @@ fn test_unnest_logical_plan() -> Result<()> { let sql_to_rel = SqlToRel::new(&context); let plan = sql_to_rel.sql_statement_to_plan(statement).unwrap(); - let expected = "Projection: unnest(unnest_table.struct_col).field1, unnest(unnest_table.struct_col).field2, unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\ - \n Unnest: lists[unnest(unnest_table.array_col)] structs[unnest(unnest_table.struct_col)]\ - \n Projection: unnest_table.struct_col AS unnest(unnest_table.struct_col), unnest_table.array_col AS unnest(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\ - \n TableScan: unnest_table"; + let expected = "Projection: UNNEST(unnest_table.struct_col).field1, UNNEST(unnest_table.struct_col).field2, UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\ + \n Unnest: lists[UNNEST(unnest_table.array_col)] structs[UNNEST(unnest_table.struct_col)]\ + \n Projection: unnest_table.struct_col AS UNNEST(unnest_table.struct_col), unnest_table.array_col AS UNNEST(unnest_table.array_col), unnest_table.struct_col, unnest_table.array_col\ + \n TableScan: unnest_table"; assert_eq!(format!("{plan}"), expected); diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt index f2972e4c14c2..d0f9ecf54204 100644 --- a/datafusion/sqllogictest/test_files/array.slt +++ b/datafusion/sqllogictest/test_files/array.slt @@ -1137,7 +1137,7 @@ from arrays_values_without_nulls; ## array_element (aliases: array_extract, list_extract, list_element) # Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: Error during planning: array_element does not support zero arguments. +query error DataFusion error: Error during planning: Error during planning: array_element does not support zero arguments select array_element(); # array_element error @@ -1979,7 +1979,7 @@ select array_slice(a, -1, 2, 1), array_slice(a, -1, 2), [6.0] [6.0] [] [] # Testing with empty arguments should result in an error -query error DataFusion error: Error during planning: Error during planning: array_slice does not support zero arguments. +query error DataFusion error: Error during planning: Error during planning: array_slice does not support zero arguments select array_slice(); diff --git a/datafusion/sqllogictest/test_files/push_down_filter.slt b/datafusion/sqllogictest/test_files/push_down_filter.slt index 3ca187ddee84..2d74c1fc6994 100644 --- a/datafusion/sqllogictest/test_files/push_down_filter.slt +++ b/datafusion/sqllogictest/test_files/push_down_filter.slt @@ -36,9 +36,9 @@ query TT explain select uc2 from (select unnest(column2) as uc2, column1 from v) where column1 = 2; ---- logical_plan -01)Projection: unnest(v.column2) AS uc2 -02)--Unnest: lists[unnest(v.column2)] structs[] -03)----Projection: v.column2 AS unnest(v.column2), v.column1 +01)Projection: UNNEST(v.column2) AS uc2 +02)--Unnest: lists[UNNEST(v.column2)] structs[] +03)----Projection: v.column2 AS UNNEST(v.column2), v.column1 04)------Filter: v.column1 = Int64(2) 05)--------TableScan: v projection=[column1, column2] @@ -53,11 +53,11 @@ query TT explain select uc2 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3; ---- logical_plan -01)Projection: unnest(v.column2) AS uc2 -02)--Filter: unnest(v.column2) > Int64(3) -03)----Projection: unnest(v.column2) -04)------Unnest: lists[unnest(v.column2)] structs[] -05)--------Projection: v.column2 AS unnest(v.column2), v.column1 +01)Projection: UNNEST(v.column2) AS uc2 +02)--Filter: UNNEST(v.column2) > Int64(3) +03)----Projection: UNNEST(v.column2) +04)------Unnest: lists[UNNEST(v.column2)] structs[] +05)--------Projection: v.column2 AS UNNEST(v.column2), v.column1 06)----------TableScan: v projection=[column1, column2] query II @@ -71,10 +71,10 @@ query TT explain select uc2, column1 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3 AND column1 = 2; ---- logical_plan -01)Projection: unnest(v.column2) AS uc2, v.column1 -02)--Filter: unnest(v.column2) > Int64(3) -03)----Unnest: lists[unnest(v.column2)] structs[] -04)------Projection: v.column2 AS unnest(v.column2), v.column1 +01)Projection: UNNEST(v.column2) AS uc2, v.column1 +02)--Filter: UNNEST(v.column2) > Int64(3) +03)----Unnest: lists[UNNEST(v.column2)] structs[] +04)------Projection: v.column2 AS UNNEST(v.column2), v.column1 05)--------Filter: v.column1 = Int64(2) 06)----------TableScan: v projection=[column1, column2] @@ -90,10 +90,10 @@ query TT explain select uc2, column1 from (select unnest(column2) as uc2, column1 from v) where uc2 > 3 OR column1 = 2; ---- logical_plan -01)Projection: unnest(v.column2) AS uc2, v.column1 -02)--Filter: unnest(v.column2) > Int64(3) OR v.column1 = Int64(2) -03)----Unnest: lists[unnest(v.column2)] structs[] -04)------Projection: v.column2 AS unnest(v.column2), v.column1 +01)Projection: UNNEST(v.column2) AS uc2, v.column1 +02)--Filter: UNNEST(v.column2) > Int64(3) OR v.column1 = Int64(2) +03)----Unnest: lists[UNNEST(v.column2)] structs[] +04)------Projection: v.column2 AS UNNEST(v.column2), v.column1 05)--------TableScan: v projection=[column1, column2] statement ok @@ -112,10 +112,10 @@ query TT explain select * from (select column1, unnest(column2) as o from d) where o['a'] = 1; ---- logical_plan -01)Projection: d.column1, unnest(d.column2) AS o -02)--Filter: get_field(unnest(d.column2), Utf8("a")) = Int64(1) -03)----Unnest: lists[unnest(d.column2)] structs[] -04)------Projection: d.column1, d.column2 AS unnest(d.column2) +01)Projection: d.column1, UNNEST(d.column2) AS o +02)--Filter: get_field(UNNEST(d.column2), Utf8("a")) = Int64(1) +03)----Unnest: lists[UNNEST(d.column2)] structs[] +04)------Projection: d.column1, d.column2 AS UNNEST(d.column2) 05)--------TableScan: d projection=[column1, column2] diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt index d818c0e92795..4957011b8ba2 100644 --- a/datafusion/sqllogictest/test_files/unnest.slt +++ b/datafusion/sqllogictest/test_files/unnest.slt @@ -539,21 +539,21 @@ query TT explain select unnest(unnest(unnest(column3)['c1'])), column3 from recursive_unnest_table; ---- logical_plan -01)Unnest: lists[unnest(unnest(unnest(recursive_unnest_table.column3)[c1]))] structs[] -02)--Projection: unnest(unnest(recursive_unnest_table.column3)[c1]) AS unnest(unnest(unnest(recursive_unnest_table.column3)[c1])), recursive_unnest_table.column3 -03)----Unnest: lists[unnest(unnest(recursive_unnest_table.column3)[c1])] structs[] -04)------Projection: get_field(unnest(recursive_unnest_table.column3), Utf8("c1")) AS unnest(unnest(recursive_unnest_table.column3)[c1]), recursive_unnest_table.column3 -05)--------Unnest: lists[unnest(recursive_unnest_table.column3)] structs[] -06)----------Projection: recursive_unnest_table.column3 AS unnest(recursive_unnest_table.column3), recursive_unnest_table.column3 +01)Unnest: lists[UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1]))] structs[] +02)--Projection: UNNEST(UNNEST(recursive_unnest_table.column3)[c1]) AS UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), recursive_unnest_table.column3 +03)----Unnest: lists[UNNEST(UNNEST(recursive_unnest_table.column3)[c1])] structs[] +04)------Projection: get_field(UNNEST(recursive_unnest_table.column3), Utf8("c1")) AS UNNEST(UNNEST(recursive_unnest_table.column3)[c1]), recursive_unnest_table.column3 +05)--------Unnest: lists[UNNEST(recursive_unnest_table.column3)] structs[] +06)----------Projection: recursive_unnest_table.column3 AS UNNEST(recursive_unnest_table.column3), recursive_unnest_table.column3 07)------------TableScan: recursive_unnest_table projection=[column3] physical_plan 01)UnnestExec -02)--ProjectionExec: expr=[unnest(unnest(recursive_unnest_table.column3)[c1])@0 as unnest(unnest(unnest(recursive_unnest_table.column3)[c1])), column3@1 as column3] +02)--ProjectionExec: expr=[UNNEST(UNNEST(recursive_unnest_table.column3)[c1])@0 as UNNEST(UNNEST(UNNEST(recursive_unnest_table.column3)[c1])), column3@1 as column3] 03)----UnnestExec -04)------ProjectionExec: expr=[get_field(unnest(recursive_unnest_table.column3)@0, c1) as unnest(unnest(recursive_unnest_table.column3)[c1]), column3@1 as column3] +04)------ProjectionExec: expr=[get_field(UNNEST(recursive_unnest_table.column3)@0, c1) as UNNEST(UNNEST(recursive_unnest_table.column3)[c1]), column3@1 as column3] 05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 06)----------UnnestExec -07)------------ProjectionExec: expr=[column3@0 as unnest(recursive_unnest_table.column3), column3@0 as column3] +07)------------ProjectionExec: expr=[column3@0 as UNNEST(recursive_unnest_table.column3), column3@0 as column3] 08)--------------MemoryExec: partitions=1, partition_sizes=[1] ## group by unnest diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index 89f2efec66aa..f2756bb06d1e 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -417,7 +417,7 @@ pub async fn from_substrait_rel( } // Ensure the expression has a unique display name, so that project's // validate_unique_names doesn't fail - let name = x.display_name()?; + let name = x.schema_name().to_string(); let mut new_name = name.clone(); let mut i = 0; while names.contains(&new_name) { diff --git a/datafusion/substrait/tests/cases/consumer_integration.rs b/datafusion/substrait/tests/cases/consumer_integration.rs index 360377c231a3..0a86d27e013c 100644 --- a/datafusion/substrait/tests/cases/consumer_integration.rs +++ b/datafusion/substrait/tests/cases/consumer_integration.rs @@ -358,8 +358,8 @@ mod tests { let plan = from_substrait_plan(&ctx, &proto).await?; let plan_str = format!("{}", plan); - assert_eq!(plan_str, "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END) / sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS PROMO_REVENUE\ - \n Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END), sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount)]]\ + assert_eq!(plan_str, "Projection: Decimal128(Some(10000),5,2) * sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE Utf8(\"PROMO%\") THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END) / sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount) AS PROMO_REVENUE\ + \n Aggregate: groupBy=[[]], aggr=[[sum(CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE Utf8(\"PROMO%\") THEN FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount ELSE Decimal128(Some(0),19,0) END), sum(FILENAME_PLACEHOLDER_0.l_extendedprice * Int32(1) - FILENAME_PLACEHOLDER_0.l_discount)]]\ \n Projection: CASE WHEN FILENAME_PLACEHOLDER_1.p_type LIKE CAST(Utf8(\"PROMO%\") AS Utf8) THEN FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount) ELSE Decimal128(Some(0),19,0) END, FILENAME_PLACEHOLDER_0.l_extendedprice * (CAST(Int32(1) AS Decimal128(19, 0)) - FILENAME_PLACEHOLDER_0.l_discount)\ \n Filter: FILENAME_PLACEHOLDER_0.l_partkey = FILENAME_PLACEHOLDER_1.p_partkey AND FILENAME_PLACEHOLDER_0.l_shipdate >= Date32(\"1995-09-01\") AND FILENAME_PLACEHOLDER_0.l_shipdate < CAST(Utf8(\"1995-10-01\") AS Date32)\ \n Inner Join: Filter: Boolean(true)\