From d7e4077624a003306bf628c3a416d911d62ee252 Mon Sep 17 00:00:00 2001 From: Yongting You <2010youy01@gmail.com> Date: Tue, 14 Nov 2023 15:17:02 -0800 Subject: [PATCH] Demonstrate change of unify expr --- datafusion/expr/src/expr.rs | 50 +++++++++++++++++++++----- datafusion/expr/src/expr_schema.rs | 56 +++++++++++++++++++----------- 2 files changed, 77 insertions(+), 29 deletions(-) diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 97e4fcc327c3..93d2ec8638a7 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -338,11 +338,22 @@ impl Between { } } +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ScalarFunctionDefinition { + /// Resolved to a built in scalar function + /// (will be removed long term) + BuiltIn(built_in_function::BuiltinScalarFunction), + /// Resolved to a user defined function + UDF(crate::ScalarUDF), + /// A scalar function that will be called by name + Name(Arc), +} + /// ScalarFunction expression #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct ScalarFunction { /// The function - pub fun: built_in_function::BuiltinScalarFunction, + pub func_def: ScalarFunctionDefinition, /// List of expressions to feed to the functions as arguments pub args: Vec, } @@ -350,7 +361,10 @@ pub struct ScalarFunction { impl ScalarFunction { /// Create a new ScalarFunction expression pub fn new(fun: built_in_function::BuiltinScalarFunction, args: Vec) -> Self { - Self { fun, args } + Self { + func_def: ScalarFunctionDefinition::BuiltIn(fun), + args, + } } } @@ -1196,9 +1210,21 @@ impl fmt::Display for Expr { write!(f, " NULLS LAST") } } - Expr::ScalarFunction(func) => { - fmt_function(f, &func.fun.to_string(), false, &func.args, true) - } + Expr::ScalarFunction(func_expr) => match &func_expr.func_def { + ScalarFunctionDefinition::BuiltIn(builtin_func) => fmt_function( + f, + &builtin_func.to_string(), + false, + &func_expr.args, + true, + ), + ScalarFunctionDefinition::UDF(udf) => { + fmt_function(f, &udf.name, false, &func_expr.args, true) + } + ScalarFunctionDefinition::Name(func_name) => { + fmt_function(f, &func_name, false, &func_expr.args, true) + } + }, Expr::ScalarUDF(ScalarUDF { fun, args }) => { fmt_function(f, &fun.name, false, args, true) } @@ -1532,9 +1558,17 @@ fn create_name(e: &Expr) -> Result { } } } - Expr::ScalarFunction(func) => { - create_function_name(&func.fun.to_string(), false, &func.args) - } + Expr::ScalarFunction(func_expr) => match &func_expr.func_def { + ScalarFunctionDefinition::BuiltIn(builtin_func) => { + create_function_name(&builtin_func.to_string(), false, &func_expr.args) + } + ScalarFunctionDefinition::UDF(udf) => { + create_function_name(&udf.name, false, &func_expr.args) + } + ScalarFunctionDefinition::Name(name) => { + create_function_name(&name, false, &func_expr.args) + } + }, Expr::ScalarUDF(ScalarUDF { fun, args }) => { create_function_name(&fun.name, false, args) } diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs index 5881feece1fc..c46c94f77eb6 100644 --- a/datafusion/expr/src/expr_schema.rs +++ b/datafusion/expr/src/expr_schema.rs @@ -18,8 +18,8 @@ use super::{Between, Expr, Like}; use crate::expr::{ AggregateFunction, AggregateUDF, Alias, BinaryExpr, Cast, GetFieldAccess, - GetIndexedField, InList, InSubquery, Placeholder, ScalarFunction, ScalarUDF, Sort, - TryCast, WindowFunction, + GetIndexedField, InList, InSubquery, Placeholder, ScalarFunction, + ScalarFunctionDefinition, ScalarUDF, Sort, TryCast, WindowFunction, }; use crate::field_util::GetFieldAccessSchema; use crate::type_coercion::binary::get_result_type; @@ -89,25 +89,39 @@ impl ExprSchemable for Expr { .collect::>>()?; Ok((fun.return_type)(&data_types)?.as_ref().clone()) } - Expr::ScalarFunction(ScalarFunction { fun, args }) => { - let arg_data_types = args - .iter() - .map(|e| e.get_type(schema)) - .collect::>>()?; - - // verify that input data types is consistent with function's `TypeSignature` - data_types(&arg_data_types, &fun.signature()).map_err(|_| { - plan_datafusion_err!( - "{}", - utils::generate_signature_error_msg( - &format!("{fun}"), - fun.signature(), - &arg_data_types, - ) - ) - })?; - - fun.return_type(&arg_data_types) + Expr::ScalarFunction(ScalarFunction { func_def, args }) => { + match func_def { + ScalarFunctionDefinition::BuiltIn(fun) => { + let arg_data_types = args + .iter() + .map(|e| e.get_type(schema)) + .collect::>>()?; + + // verify that input data types is consistent with function's `TypeSignature` + data_types(&arg_data_types, &fun.signature()).map_err(|_| { + plan_datafusion_err!( + "{}", + utils::generate_signature_error_msg( + &format!("{fun}"), + fun.signature(), + &arg_data_types, + ) + ) + })?; + + fun.return_type(&arg_data_types) + } + ScalarFunctionDefinition::UDF(fun) => { + let data_types = args + .iter() + .map(|e| e.get_type(schema)) + .collect::>>()?; + Ok((fun.return_type)(&data_types)?.as_ref().clone()) + } + ScalarFunctionDefinition::Name(_) => { + return internal_err!("Function Expr in string representation should be resolved during planning") + } + } } Expr::WindowFunction(WindowFunction { fun, args, .. }) => { let data_types = args