From d4c2bd1f5f9be96e0c7f4105bff8f342fa4de5fe Mon Sep 17 00:00:00 2001 From: Chojan Shang Date: Sat, 10 Aug 2024 08:28:29 +0800 Subject: [PATCH] Implement native support StringView for Ends With Signed-off-by: Chojan Shang --- datafusion/functions/src/string/ends_with.rs | 33 +++++++++---------- .../sqllogictest/test_files/string_view.slt | 6 ++-- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/datafusion/functions/src/string/ends_with.rs b/datafusion/functions/src/string/ends_with.rs index b72cf0f66fa6..03a1795954d0 100644 --- a/datafusion/functions/src/string/ends_with.rs +++ b/datafusion/functions/src/string/ends_with.rs @@ -18,12 +18,10 @@ use std::any::Any; use std::sync::Arc; -use arrow::array::{ArrayRef, OffsetSizeTrait}; +use arrow::array::ArrayRef; use arrow::datatypes::DataType; -use arrow::datatypes::DataType::Boolean; -use datafusion_common::cast::as_generic_string_array; -use datafusion_common::{exec_err, Result}; +use datafusion_common::{internal_err, Result}; use datafusion_expr::TypeSignature::*; use datafusion_expr::{ColumnarValue, Volatility}; use datafusion_expr::{ScalarUDFImpl, Signature}; @@ -43,14 +41,15 @@ impl Default for EndsWithFunc { impl EndsWithFunc { pub fn new() -> Self { - use DataType::*; Self { signature: Signature::one_of( vec![ - Exact(vec![Utf8, Utf8]), - Exact(vec![Utf8, LargeUtf8]), - Exact(vec![LargeUtf8, Utf8]), - Exact(vec![LargeUtf8, LargeUtf8]), + // Planner attempts coercion to the target type starting with the most preferred candidate. + // For example, given input `(Utf8View, Utf8)`, it first tries coercing to `(Utf8View, Utf8View)`. + // If that fails, it proceeds to `(Utf8, Utf8)`. + Exact(vec![DataType::Utf8View, DataType::Utf8View]), + Exact(vec![DataType::Utf8, DataType::Utf8]), + Exact(vec![DataType::LargeUtf8, DataType::LargeUtf8]), ], Volatility::Immutable, ), @@ -72,15 +71,16 @@ impl ScalarUDFImpl for EndsWithFunc { } fn return_type(&self, _arg_types: &[DataType]) -> Result { - Ok(Boolean) + Ok(DataType::Boolean) } fn invoke(&self, args: &[ColumnarValue]) -> Result { match args[0].data_type() { - DataType::Utf8 => make_scalar_function(ends_with::, vec![])(args), - DataType::LargeUtf8 => make_scalar_function(ends_with::, vec![])(args), + DataType::Utf8View | DataType::Utf8 | DataType::LargeUtf8 => { + make_scalar_function(ends_with, vec![])(args) + } other => { - exec_err!("Unsupported data type {other:?} for function ends_with") + internal_err!("Unsupported data type {other:?} for function ends_with. Expected Utf8, LargeUtf8 or Utf8View")? } } } @@ -88,11 +88,8 @@ impl ScalarUDFImpl for EndsWithFunc { /// Returns true if string ends with suffix. /// ends_with('alphabet', 'abet') = 't' -pub fn ends_with(args: &[ArrayRef]) -> Result { - let left = as_generic_string_array::(&args[0])?; - let right = as_generic_string_array::(&args[1])?; - - let result = arrow::compute::kernels::comparison::ends_with(left, right)?; +pub fn ends_with(args: &[ArrayRef]) -> Result { + let result = arrow::compute::kernels::comparison::ends_with(&args[0], &args[1])?; Ok(Arc::new(result) as ArrayRef) } diff --git a/datafusion/sqllogictest/test_files/string_view.slt b/datafusion/sqllogictest/test_files/string_view.slt index e7166690580f..d0822eaa8e8c 100644 --- a/datafusion/sqllogictest/test_files/string_view.slt +++ b/datafusion/sqllogictest/test_files/string_view.slt @@ -574,7 +574,6 @@ logical_plan 03)----TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for ENDS_WITH -## TODO https://github.com/apache/datafusion/issues/11852 query TT EXPLAIN SELECT ENDS_WITH(column1_utf8view, 'foo') as c1, @@ -582,9 +581,8 @@ EXPLAIN SELECT FROM test; ---- logical_plan -01)Projection: ends_with(CAST(test.column1_utf8view AS Utf8), Utf8("foo")) AS c1, ends_with(__common_expr_1, __common_expr_1) AS c2 -02)--Projection: CAST(test.column2_utf8view AS Utf8) AS __common_expr_1, test.column1_utf8view -03)----TableScan: test projection=[column1_utf8view, column2_utf8view] +01)Projection: ends_with(test.column1_utf8view, Utf8View("foo")) AS c1, ends_with(test.column2_utf8view, test.column2_utf8view) AS c2 +02)--TableScan: test projection=[column1_utf8view, column2_utf8view] ## Ensure no casts for INITCAP