-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Create built-in scalar functions programmatically #1734
Changes from 4 commits
4e1e9f3
78db02b
9cb1146
330dd8c
2b1cc01
75539c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,17 +37,17 @@ pub use dfschema::{DFField, DFSchema, DFSchemaRef, ToDFSchema}; | |
pub use display::display_schema; | ||
pub use expr::{ | ||
abs, acos, and, approx_distinct, approx_percentile_cont, array, ascii, asin, atan, | ||
avg, binary_expr, bit_length, btrim, case, ceil, character_length, chr, col, | ||
columnize_expr, combine_filters, concat, concat_ws, cos, count, count_distinct, | ||
create_udaf, create_udf, date_part, date_trunc, digest, exp, exprlist_to_fields, | ||
floor, in_list, initcap, left, length, lit, lit_timestamp_nano, ln, log10, log2, | ||
lower, lpad, ltrim, max, md5, min, normalize_col, normalize_cols, now, octet_length, | ||
or, random, regexp_match, regexp_replace, repeat, replace, replace_col, reverse, | ||
rewrite_sort_cols_by_aggs, right, round, rpad, rtrim, sha224, sha256, sha384, sha512, | ||
signum, sin, split_part, sqrt, starts_with, strpos, substr, sum, tan, to_hex, | ||
translate, trim, trunc, unalias, unnormalize_col, unnormalize_cols, upper, when, | ||
Column, Expr, ExprRewriter, ExpressionVisitor, Literal, Recursion, RewriteRecursion, | ||
SimplifyInfo, | ||
avg, binary_expr, bit_length, btrim, call_builtin_scalar_fn, case, ceil, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seeing all these other names, what would you think of changing the name from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you, I will change it |
||
character_length, chr, col, columnize_expr, combine_filters, concat, concat_ws, cos, | ||
count, count_distinct, create_udaf, create_udf, date_part, date_trunc, digest, exp, | ||
exprlist_to_fields, floor, in_list, initcap, left, length, lit, lit_timestamp_nano, | ||
ln, log10, log2, lower, lpad, ltrim, max, md5, min, normalize_col, normalize_cols, | ||
now, octet_length, or, random, regexp_match, regexp_replace, repeat, replace, | ||
replace_col, reverse, rewrite_sort_cols_by_aggs, right, round, rpad, rtrim, sha224, | ||
sha256, sha384, sha512, signum, sin, split_part, sqrt, starts_with, strpos, substr, | ||
sum, tan, to_hex, translate, trim, trunc, unalias, unnormalize_col, unnormalize_cols, | ||
upper, when, Column, Expr, ExprRewriter, ExpressionVisitor, Literal, Recursion, | ||
RewriteRecursion, SimplifyInfo, | ||
}; | ||
pub use extension::UserDefinedLogicalNode; | ||
pub use operators::Operator; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -735,8 +735,8 @@ mod tests { | |
use super::*; | ||
use crate::assert_contains; | ||
use crate::logical_plan::{ | ||
and, binary_expr, col, create_udf, lit, lit_timestamp_nano, DFField, Expr, | ||
LogicalPlanBuilder, | ||
and, binary_expr, call_builtin_scalar_fn, col, create_udf, lit, | ||
lit_timestamp_nano, DFField, Expr, LogicalPlanBuilder, | ||
}; | ||
use crate::physical_plan::functions::{make_scalar_function, BuiltinScalarFunction}; | ||
use crate::physical_plan::udf::ScalarUDF; | ||
|
@@ -1010,46 +1010,34 @@ mod tests { | |
#[test] | ||
fn test_const_evaluator_scalar_functions() { | ||
// concat("foo", "bar") --> "foobar" | ||
let expr = Expr::ScalarFunction { | ||
args: vec![lit("foo"), lit("bar")], | ||
fun: BuiltinScalarFunction::Concat, | ||
}; | ||
let expr = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well that is certainly nicer looking 👍 |
||
call_builtin_scalar_fn("concat", vec![lit("foo"), lit("bar")]).unwrap(); | ||
test_evaluate(expr, lit("foobar")); | ||
|
||
// ensure arguments are also constant folded | ||
// concat("foo", concat("bar", "baz")) --> "foobarbaz" | ||
let concat1 = Expr::ScalarFunction { | ||
args: vec![lit("bar"), lit("baz")], | ||
fun: BuiltinScalarFunction::Concat, | ||
}; | ||
let expr = Expr::ScalarFunction { | ||
args: vec![lit("foo"), concat1], | ||
fun: BuiltinScalarFunction::Concat, | ||
}; | ||
let concat1 = | ||
call_builtin_scalar_fn("concat", vec![lit("bar"), lit("baz")]).unwrap(); | ||
let expr = call_builtin_scalar_fn("concat", vec![lit("foo"), concat1]).unwrap(); | ||
test_evaluate(expr, lit("foobarbaz")); | ||
|
||
// Check non string arguments | ||
// to_timestamp("2020-09-08T12:00:00+00:00") --> timestamp(1599566400000000000i64) | ||
let expr = Expr::ScalarFunction { | ||
args: vec![lit("2020-09-08T12:00:00+00:00")], | ||
fun: BuiltinScalarFunction::ToTimestamp, | ||
}; | ||
let expr = call_builtin_scalar_fn( | ||
"to_timestamp", | ||
vec![lit("2020-09-08T12:00:00+00:00")], | ||
) | ||
.unwrap(); | ||
test_evaluate(expr, lit_timestamp_nano(1599566400000000000i64)); | ||
|
||
// check that non foldable arguments are folded | ||
// to_timestamp(a) --> to_timestamp(a) [no rewrite possible] | ||
let expr = Expr::ScalarFunction { | ||
args: vec![col("a")], | ||
fun: BuiltinScalarFunction::ToTimestamp, | ||
}; | ||
let expr = call_builtin_scalar_fn("to_timestamp", vec![col("a")]).unwrap(); | ||
test_evaluate(expr.clone(), expr); | ||
|
||
// check that non foldable arguments are folded | ||
// to_timestamp(a) --> to_timestamp(a) [no rewrite possible] | ||
let expr = Expr::ScalarFunction { | ||
args: vec![col("a")], | ||
fun: BuiltinScalarFunction::ToTimestamp, | ||
}; | ||
let expr = call_builtin_scalar_fn("to_timestamp", vec![col("a")]).unwrap(); | ||
test_evaluate(expr.clone(), expr); | ||
|
||
// volatile / stable functions should not be evaluated | ||
|
@@ -1090,10 +1078,7 @@ mod tests { | |
} | ||
|
||
fn now_expr() -> Expr { | ||
Expr::ScalarFunction { | ||
args: vec![], | ||
fun: BuiltinScalarFunction::Now, | ||
} | ||
call_builtin_scalar_fn("now", vec![]).unwrap() | ||
} | ||
|
||
fn cast_to_int64_expr(expr: Expr) -> Expr { | ||
|
@@ -1104,10 +1089,7 @@ mod tests { | |
} | ||
|
||
fn to_timestamp_expr(arg: impl Into<String>) -> Expr { | ||
Expr::ScalarFunction { | ||
args: vec![lit(arg.into())], | ||
fun: BuiltinScalarFunction::ToTimestamp, | ||
} | ||
call_builtin_scalar_fn("to_timestamp", vec![lit(arg.into())]).unwrap() | ||
} | ||
|
||
#[test] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One way to write this more idiomatically is
(not required, I am just pointing it out because it took me a while to get my head around working with Option and Results, and we are all learning together)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i wonder if it makes sense to make it a macro rather than a function call so that nonexisteng built-in functions will be caught during compile time not runtime
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree,
call_builtin_scalar_fn!(ToTimestamp, vec![lit("2020-09-08T12:00:00+00:00")])
is as readable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you @jimexist !
And alamb 's opinion of the trade-off between macro and function call is here:
#1718 (comment)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it needed to implement both?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No I don't think so. If we want a macro we can always add that as a follow on PR.
Thanks @HaoYang670 !