diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 665c45651863..0987cfe20b23 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -270,7 +270,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.2.3", + "indexmap 2.2.4", "lexical-core", "num", "serde", @@ -384,7 +384,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -874,7 +874,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-targets 0.52.3", + "windows-targets 0.52.4", ] [[package]] @@ -1073,7 +1073,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" dependencies = [ "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -1126,7 +1126,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.3", - "indexmap 2.2.3", + "indexmap 2.2.4", "itertools", "log", "num-traits", @@ -1300,7 +1300,7 @@ dependencies = [ "half", "hashbrown 0.14.3", "hex", - "indexmap 2.2.3", + "indexmap 2.2.4", "itertools", "log", "md-5", @@ -1332,7 +1332,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.3", - "indexmap 2.2.3", + "indexmap 2.2.4", "itertools", "log", "once_cell", @@ -1622,7 +1622,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -1706,7 +1706,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 2.2.3", + "indexmap 2.2.4", "slab", "tokio", "tokio-util", @@ -1766,9 +1766,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379dada1584ad501b383485dd706b8afb7a70fcbc7f4da7d780638a5a6124a60" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -1923,9 +1923,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.3" +version = "2.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" +checksum = "967d6dd42f16dbf0eb8040cb9e477933562684d3918f7d253f2ff9087fb3e7a3" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -2124,9 +2124,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lz4_flex" @@ -2313,7 +2313,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.8", + "hermit-abi 0.3.9", "libc", ] @@ -2475,7 +2475,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.2.3", + "indexmap 2.2.4", ] [[package]] @@ -2533,7 +2533,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3074,7 +3074,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3209,7 +3209,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3255,7 +3255,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3268,7 +3268,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3290,9 +3290,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.51" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab617d94515e94ae53b8406c628598680aa0c9587474ecbe58188f7b345d66c" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -3376,7 +3376,7 @@ checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3471,7 +3471,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3568,7 +3568,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3613,7 +3613,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3767,7 +3767,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", "wasm-bindgen-shared", ] @@ -3801,7 +3801,7 @@ checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3882,7 +3882,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.3", + "windows-targets 0.52.4", ] [[package]] @@ -3900,7 +3900,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.3", + "windows-targets 0.52.4", ] [[package]] @@ -3920,17 +3920,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d380ba1dc7187569a8a9e91ed34b8ccfc33123bbacb8c0aed2d1ad7f3ef2dc5f" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.52.3", - "windows_aarch64_msvc 0.52.3", - "windows_i686_gnu 0.52.3", - "windows_i686_msvc 0.52.3", - "windows_x86_64_gnu 0.52.3", - "windows_x86_64_gnullvm 0.52.3", - "windows_x86_64_msvc 0.52.3", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -3941,9 +3941,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68e5dcfb9413f53afd9c8f86e56a7b4d86d9a2fa26090ea2dc9e40fba56c6ec6" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" @@ -3953,9 +3953,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dab469ebbc45798319e69eebf92308e541ce46760b49b18c6b3fe5e8965b30f" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" @@ -3965,9 +3965,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a4e9b6a7cac734a8b4138a4e1044eac3404d8326b6c0f939276560687a033fb" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" @@ -3977,9 +3977,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b0ec9c422ca95ff34a78755cfa6ad4a51371da2a5ace67500cf7ca5f232c58" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" @@ -3989,9 +3989,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704131571ba93e89d7cd43482277d6632589b18ecf4468f591fbae0a8b101614" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" @@ -4001,9 +4001,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42079295511643151e98d61c38c0acc444e52dd42ab456f7ccfd5152e8ecf21c" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" @@ -4013,9 +4013,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "winreg" @@ -4059,7 +4059,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index ff553a48888b..8bb23e96e0e2 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -282,26 +282,6 @@ async fn test_fn_initcap() -> Result<()> { Ok(()) } -#[tokio::test] -async fn test_fn_instr() -> Result<()> { - let expr = instr(col("a"), lit("b")); - - let expected = [ - "+-------------------------+", - "| instr(test.a,Utf8(\"b\")) |", - "+-------------------------+", - "| 2 |", - "| 2 |", - "| 0 |", - "| 5 |", - "+-------------------------+", - ]; - - assert_fn_batches!(expr, expected); - - Ok(()) -} - #[tokio::test] #[cfg(feature = "unicode_expressions")] async fn test_fn_left() -> Result<()> { diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index 51e2ee79bc21..9e8764cd204d 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -130,8 +130,6 @@ pub enum BuiltinScalarFunction { ArrayPopFront, /// array_pop_back ArrayPopBack, - /// array_dims - ArrayDims, /// array_distinct ArrayDistinct, /// array_element @@ -140,8 +138,6 @@ pub enum BuiltinScalarFunction { ArrayEmpty, /// array_length ArrayLength, - /// array_ndims - ArrayNdims, /// array_position ArrayPosition, /// array_positions @@ -172,8 +168,6 @@ pub enum BuiltinScalarFunction { ArrayUnion, /// array_except ArrayExcept, - /// cardinality - Cardinality, /// array_resize ArrayResize, /// construct an array from columns @@ -204,8 +198,6 @@ pub enum BuiltinScalarFunction { EndsWith, /// initcap InitCap, - /// InStr - InStr, /// left Left, /// lpad @@ -379,12 +371,10 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayHasAll => Volatility::Immutable, BuiltinScalarFunction::ArrayHasAny => Volatility::Immutable, BuiltinScalarFunction::ArrayHas => Volatility::Immutable, - BuiltinScalarFunction::ArrayDims => Volatility::Immutable, BuiltinScalarFunction::ArrayDistinct => Volatility::Immutable, BuiltinScalarFunction::ArrayElement => Volatility::Immutable, BuiltinScalarFunction::ArrayExcept => Volatility::Immutable, BuiltinScalarFunction::ArrayLength => Volatility::Immutable, - BuiltinScalarFunction::ArrayNdims => Volatility::Immutable, BuiltinScalarFunction::ArrayPopFront => Volatility::Immutable, BuiltinScalarFunction::ArrayPopBack => Volatility::Immutable, BuiltinScalarFunction::ArrayPosition => Volatility::Immutable, @@ -403,7 +393,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Volatility::Immutable, BuiltinScalarFunction::ArrayUnion => Volatility::Immutable, BuiltinScalarFunction::ArrayResize => Volatility::Immutable, - BuiltinScalarFunction::Cardinality => Volatility::Immutable, BuiltinScalarFunction::MakeArray => Volatility::Immutable, BuiltinScalarFunction::Ascii => Volatility::Immutable, BuiltinScalarFunction::BitLength => Volatility::Immutable, @@ -414,7 +403,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ConcatWithSeparator => Volatility::Immutable, BuiltinScalarFunction::EndsWith => Volatility::Immutable, BuiltinScalarFunction::InitCap => Volatility::Immutable, - BuiltinScalarFunction::InStr => Volatility::Immutable, BuiltinScalarFunction::Left => Volatility::Immutable, BuiltinScalarFunction::Lpad => Volatility::Immutable, BuiltinScalarFunction::Lower => Volatility::Immutable, @@ -552,9 +540,6 @@ impl BuiltinScalarFunction { | BuiltinScalarFunction::ArrayHasAny | BuiltinScalarFunction::ArrayHas | BuiltinScalarFunction::ArrayEmpty => Ok(Boolean), - BuiltinScalarFunction::ArrayDims => { - Ok(List(Arc::new(Field::new("item", UInt64, true)))) - } BuiltinScalarFunction::ArrayDistinct => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayElement => match &input_expr_types[0] { List(field) @@ -565,7 +550,6 @@ impl BuiltinScalarFunction { ), }, BuiltinScalarFunction::ArrayLength => Ok(UInt64), - BuiltinScalarFunction::ArrayNdims => Ok(UInt64), BuiltinScalarFunction::ArrayPopFront => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPopBack => Ok(input_expr_types[0].clone()), BuiltinScalarFunction::ArrayPosition => Ok(UInt64), @@ -613,7 +597,6 @@ impl BuiltinScalarFunction { (dt, _) => Ok(dt), } } - BuiltinScalarFunction::Cardinality => Ok(UInt64), BuiltinScalarFunction::MakeArray => match input_expr_types.len() { 0 => Ok(List(Arc::new(Field::new("item", Null, true)))), _ => { @@ -649,9 +632,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::InitCap => { utf8_to_str_type(&input_expr_types[0], "initcap") } - BuiltinScalarFunction::InStr => { - utf8_to_int_type(&input_expr_types[0], "instr/position") - } BuiltinScalarFunction::Left => utf8_to_str_type(&input_expr_types[0], "left"), BuiltinScalarFunction::Lower => { utf8_to_str_type(&input_expr_types[0], "lower") @@ -712,7 +692,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::StartsWith => Ok(Boolean), BuiltinScalarFunction::EndsWith => Ok(Boolean), BuiltinScalarFunction::Strpos => { - utf8_to_int_type(&input_expr_types[0], "strpos") + utf8_to_int_type(&input_expr_types[0], "strpos/instr/position") } BuiltinScalarFunction::Substr => { utf8_to_str_type(&input_expr_types[0], "substr") @@ -853,7 +833,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayConcat => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayDims => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayEmpty => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayElement => { Signature::array_and_index(self.volatility()) @@ -869,7 +848,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayLength => { Signature::variadic_any(self.volatility()) } - BuiltinScalarFunction::ArrayNdims => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayDistinct => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayPosition => { Signature::array_and_element_and_optional_index(self.volatility()) @@ -900,7 +878,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayIntersect => Signature::any(2, self.volatility()), BuiltinScalarFunction::ArrayUnion => Signature::any(2, self.volatility()), - BuiltinScalarFunction::Cardinality => Signature::array(self.volatility()), BuiltinScalarFunction::ArrayResize => { Signature::variadic_any(self.volatility()) } @@ -1027,7 +1004,6 @@ impl BuiltinScalarFunction { ), BuiltinScalarFunction::EndsWith - | BuiltinScalarFunction::InStr | BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith => Signature::one_of( vec![ @@ -1278,7 +1254,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Chr => &["chr"], BuiltinScalarFunction::EndsWith => &["ends_with"], BuiltinScalarFunction::InitCap => &["initcap"], - BuiltinScalarFunction::InStr => &["instr", "position"], BuiltinScalarFunction::Left => &["left"], BuiltinScalarFunction::Lower => &["lower"], BuiltinScalarFunction::Lpad => &["lpad"], @@ -1295,7 +1270,7 @@ impl BuiltinScalarFunction { &["string_to_array", "string_to_list"] } BuiltinScalarFunction::StartsWith => &["starts_with"], - BuiltinScalarFunction::Strpos => &["strpos"], + BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"], BuiltinScalarFunction::Substr => &["substr"], BuiltinScalarFunction::ToHex => &["to_hex"], BuiltinScalarFunction::Translate => &["translate"], @@ -1340,7 +1315,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayConcat => { &["array_concat", "array_cat", "list_concat", "list_cat"] } - BuiltinScalarFunction::ArrayDims => &["array_dims", "list_dims"], BuiltinScalarFunction::ArrayDistinct => &["array_distinct", "list_distinct"], BuiltinScalarFunction::ArrayEmpty => &["empty"], BuiltinScalarFunction::ArrayElement => &[ @@ -1357,7 +1331,6 @@ impl BuiltinScalarFunction { &["array_has", "list_has", "array_contains", "list_contains"] } BuiltinScalarFunction::ArrayLength => &["array_length", "list_length"], - BuiltinScalarFunction::ArrayNdims => &["array_ndims", "list_ndims"], BuiltinScalarFunction::ArrayPopFront => { &["array_pop_front", "list_pop_front"] } @@ -1393,7 +1366,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::ArrayReverse => &["array_reverse", "list_reverse"], BuiltinScalarFunction::ArraySlice => &["array_slice", "list_slice"], BuiltinScalarFunction::ArrayUnion => &["array_union", "list_union"], - BuiltinScalarFunction::Cardinality => &["cardinality"], BuiltinScalarFunction::ArrayResize => &["array_resize", "list_resize"], BuiltinScalarFunction::MakeArray => &["make_array", "make_list"], BuiltinScalarFunction::ArrayIntersect => { diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 585679d50706..f22944acd45c 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -628,12 +628,6 @@ scalar_expr!( array, "flattens an array of arrays into a single array." ); -scalar_expr!( - ArrayDims, - array_dims, - array, - "returns an array of the array's dimensions." -); scalar_expr!( ArrayElement, array_element, @@ -652,12 +646,6 @@ scalar_expr!( array dimension, "returns the length of the array dimension." ); -scalar_expr!( - ArrayNdims, - array_ndims, - array, - "returns the number of dimensions of the array." -); scalar_expr!( ArrayDistinct, array_distinct, @@ -738,13 +726,6 @@ scalar_expr!( ); scalar_expr!(ArrayUnion, array_union, array1 array2, "returns an array of the elements in the union of array1 and array2 without duplicates."); -scalar_expr!( - Cardinality, - cardinality, - array, - "returns the total number of elements in the array." -); - scalar_expr!( ArrayResize, array_resize, @@ -786,7 +767,6 @@ scalar_expr!( ); scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input`, using the `algorithm`"); scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase"); -scalar_expr!(InStr, instr, string substring, "returns the position of the first occurrence of `substring` in `string`"); scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`"); scalar_expr!(Lower, lower, string, "convert the string to lower case"); scalar_expr!( @@ -1329,7 +1309,6 @@ mod test { test_scalar_expr!(Gcd, gcd, arg_1, arg_2); test_scalar_expr!(Lcm, lcm, arg_1, arg_2); test_scalar_expr!(InitCap, initcap, string); - test_scalar_expr!(InStr, instr, string, substring); test_scalar_expr!(Left, left, string, count); test_scalar_expr!(Lower, lower, string); test_nary_scalar_expr!(Lpad, lpad, string, count); @@ -1383,9 +1362,7 @@ mod test { test_scalar_expr!(ArraySort, array_sort, array, desc, null_first); test_scalar_expr!(ArrayPopFront, array_pop_front, array); test_scalar_expr!(ArrayPopBack, array_pop_back, array); - test_unary_scalar_expr!(ArrayDims, array_dims); test_scalar_expr!(ArrayLength, array_length, array, dimension); - test_unary_scalar_expr!(ArrayNdims, array_ndims); test_scalar_expr!(ArrayPosition, array_position, array, element, index); test_scalar_expr!(ArrayPositions, array_positions, array, element); test_scalar_expr!(ArrayPrepend, array_prepend, array, element); @@ -1396,7 +1373,6 @@ mod test { test_scalar_expr!(ArrayReplace, array_replace, array, from, to); test_scalar_expr!(ArrayReplaceN, array_replace_n, array, from, to, max); test_scalar_expr!(ArrayReplaceAll, array_replace_all, array, from, to); - test_unary_scalar_expr!(Cardinality, cardinality); test_nary_scalar_expr!(MakeArray, array, input); test_unary_scalar_expr!(ArrowTypeof, arrow_typeof); diff --git a/datafusion/functions-array/src/kernels.rs b/datafusion/functions-array/src/kernels.rs index b9a68b466605..8631e42a0f80 100644 --- a/datafusion/functions-array/src/kernels.rs +++ b/datafusion/functions-array/src/kernels.rs @@ -17,16 +17,19 @@ //! implementation kernels for array functions +use arrow::array::ListArray; use arrow::array::{ Array, ArrayRef, BooleanArray, Float32Array, Float64Array, GenericListArray, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, OffsetSizeTrait, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; -use arrow::datatypes::DataType; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::Field; +use arrow::datatypes::{DataType, UInt64Type}; use datafusion_common::cast::{ as_int64_array, as_large_list_array, as_list_array, as_string_array, }; -use datafusion_common::{exec_err, DataFusionError}; +use datafusion_common::{exec_err, DataFusionError, Result}; use std::any::type_name; use std::sync::Arc; macro_rules! downcast_arg { @@ -102,7 +105,7 @@ macro_rules! call_array_function { } /// Array_to_string SQL function -pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result { +pub(super) fn array_to_string(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { return exec_err!("array_to_string expects two or three arguments"); } @@ -254,9 +257,6 @@ pub(super) fn array_to_string(args: &[ArrayRef]) -> datafusion_common::Result [0, 1, 2] /// gen_range(1, 4) => [1, 2, 3] /// gen_range(1, 7, 2) => [1, 3, 5] -pub fn gen_range( - args: &[ArrayRef], - include_upper: i64, -) -> datafusion_common::Result { +pub fn gen_range(args: &[ArrayRef], include_upper: i64) -> Result { let (start_array, stop_array, step_array) = match args.len() { 1 => (None, as_int64_array(&args[0])?, None), 2 => ( @@ -319,3 +316,125 @@ pub fn gen_range( )?); Ok(arr) } + +/// Returns the length of each array dimension +fn compute_array_dims(arr: Option) -> Result>>> { + let mut value = match arr { + Some(arr) => arr, + None => return Ok(None), + }; + if value.is_empty() { + return Ok(None); + } + let mut res = vec![Some(value.len() as u64)]; + + loop { + match value.data_type() { + DataType::List(..) => { + value = downcast_arg!(value, ListArray).value(0); + res.push(Some(value.len() as u64)); + } + _ => return Ok(Some(res)), + } + } +} + +fn generic_list_cardinality( + array: &GenericListArray, +) -> Result { + let result = array + .iter() + .map(|arr| match compute_array_dims(arr)? { + Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::())), + None => Ok(None), + }) + .collect::>()?; + Ok(Arc::new(result) as ArrayRef) +} + +/// Cardinality SQL function +pub fn cardinality(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("cardinality expects one argument"); + } + + match &args[0].data_type() { + DataType::List(_) => { + let list_array = as_list_array(&args[0])?; + generic_list_cardinality::(list_array) + } + DataType::LargeList(_) => { + let list_array = as_large_list_array(&args[0])?; + generic_list_cardinality::(list_array) + } + other => { + exec_err!("cardinality does not support type '{:?}'", other) + } + } +} + +/// Array_dims SQL function +pub fn array_dims(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("array_dims needs one argument"); + } + + let data = match args[0].data_type() { + DataType::List(_) => { + let array = as_list_array(&args[0])?; + array + .iter() + .map(compute_array_dims) + .collect::>>()? + } + DataType::LargeList(_) => { + let array = as_large_list_array(&args[0])?; + array + .iter() + .map(compute_array_dims) + .collect::>>()? + } + array_type => { + return exec_err!("array_dims does not support type '{array_type:?}'"); + } + }; + + let result = ListArray::from_iter_primitive::(data); + + Ok(Arc::new(result) as ArrayRef) +} + +/// Array_ndims SQL function +pub fn array_ndims(args: &[ArrayRef]) -> Result { + if args.len() != 1 { + return exec_err!("array_ndims needs one argument"); + } + + fn general_list_ndims( + array: &GenericListArray, + ) -> Result { + let mut data = Vec::new(); + let ndims = datafusion_common::utils::list_ndims(array.data_type()); + + for arr in array.iter() { + if arr.is_some() { + data.push(Some(ndims)) + } else { + data.push(None) + } + } + + Ok(Arc::new(UInt64Array::from(data)) as ArrayRef) + } + match args[0].data_type() { + DataType::List(_) => { + let array = as_list_array(&args[0])?; + general_list_ndims::(array) + } + DataType::LargeList(_) => { + let array = as_large_list_array(&args[0])?; + general_list_ndims::(array) + } + array_type => exec_err!("array_ndims does not support type {array_type:?}"), + } +} diff --git a/datafusion/functions-array/src/lib.rs b/datafusion/functions-array/src/lib.rs index e3515ccf9f72..e4cdf69aa93a 100644 --- a/datafusion/functions-array/src/lib.rs +++ b/datafusion/functions-array/src/lib.rs @@ -39,7 +39,10 @@ use std::sync::Arc; /// Fluent-style API for creating `Expr`s pub mod expr_fn { + pub use super::udf::array_dims; + pub use super::udf::array_ndims; pub use super::udf::array_to_string; + pub use super::udf::cardinality; pub use super::udf::gen_series; pub use super::udf::range; } @@ -50,6 +53,9 @@ pub fn register_all(registry: &mut dyn FunctionRegistry) -> Result<()> { udf::array_to_string_udf(), udf::range_udf(), udf::gen_series_udf(), + udf::array_dims_udf(), + udf::cardinality_udf(), + udf::array_ndims_udf(), ]; functions.into_iter().try_for_each(|udf| { let existing_udf = registry.register_udf(udf)?; diff --git a/datafusion/functions-array/src/udf.rs b/datafusion/functions-array/src/udf.rs index 17769419c0b2..a35b45464795 100644 --- a/datafusion/functions-array/src/udf.rs +++ b/datafusion/functions-array/src/udf.rs @@ -202,3 +202,168 @@ impl ScalarUDFImpl for GenSeries { &self.aliases } } + +make_udf_function!( + ArrayDims, + array_dims, + array, + "returns an array of the array's dimensions.", + array_dims_udf +); + +#[derive(Debug)] +pub(super) struct ArrayDims { + signature: Signature, + aliases: Vec, +} + +impl ArrayDims { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec!["array_dims".to_string(), "list_dims".to_string()], + } + } +} + +impl ScalarUDFImpl for ArrayDims { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_dims" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => { + List(Arc::new(Field::new("item", UInt64, true))) + } + _ => { + return plan_err!("The array_dims function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_dims(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +make_udf_function!( + Cardinality, + cardinality, + array, + "returns the total number of elements in the array.", + cardinality_udf +); + +impl Cardinality { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec![String::from("cardinality")], + } + } +} + +#[derive(Debug)] +pub(super) struct Cardinality { + signature: Signature, + aliases: Vec, +} +impl ScalarUDFImpl for Cardinality { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "cardinality" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, + _ => { + return plan_err!("The cardinality function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::cardinality(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} + +make_udf_function!( + ArrayNdims, + array_ndims, + array, + "returns the number of dimensions of the array.", + array_ndims_udf +); + +#[derive(Debug)] +pub(super) struct ArrayNdims { + signature: Signature, + aliases: Vec, +} +impl ArrayNdims { + pub fn new() -> Self { + Self { + signature: Signature::array(Volatility::Immutable), + aliases: vec![String::from("array_ndims"), String::from("list_ndims")], + } + } +} + +impl ScalarUDFImpl for ArrayNdims { + fn as_any(&self) -> &dyn Any { + self + } + fn name(&self) -> &str { + "array_ndims" + } + + fn signature(&self) -> &Signature { + &self.signature + } + + fn return_type(&self, arg_types: &[DataType]) -> datafusion_common::Result { + use DataType::*; + Ok(match arg_types[0] { + List(_) | LargeList(_) | FixedSizeList(_, _) => UInt64, + _ => { + return plan_err!("The array_ndims function can only accept List/LargeList/FixedSizeList."); + } + }) + } + + fn invoke(&self, args: &[ColumnarValue]) -> datafusion_common::Result { + let args = ColumnarValue::values_to_arrays(args)?; + crate::kernels::array_ndims(&args).map(ColumnarValue::Array) + } + + fn aliases(&self) -> &[String] { + &self.aliases + } +} diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs index adba84af72ae..cd8593337c7a 100644 --- a/datafusion/functions/src/datetime/to_timestamp.rs +++ b/datafusion/functions/src/datetime/to_timestamp.rs @@ -561,6 +561,31 @@ mod tests { Ok(()) } + #[test] + fn to_timestamp_with_invalid_tz() -> Result<()> { + let mut date_string_builder = StringBuilder::with_capacity(2, 1024); + + date_string_builder.append_null(); + + date_string_builder.append_value("2020-09-08T13:42:29ZZ"); + + let string_array = + ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef); + + let expected_err = + "Arrow error: Parser error: Invalid timezone \"ZZ\": 'ZZ' is not a valid timezone"; + match to_timestamp(&[string_array]) { + Ok(_) => panic!("Expected error but got success"), + Err(e) => { + assert!( + e.to_string().contains(expected_err), + "Can not find expected error '{expected_err}'. Actual error '{e}'" + ); + } + } + Ok(()) + } + #[test] fn to_timestamp_with_no_matching_formats() -> Result<()> { let mut date_string_builder = StringBuilder::with_capacity(2, 1024); diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 01b2ae13c8d4..c10f5df54027 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -193,28 +193,6 @@ fn compute_array_length( } } -/// Returns the length of each array dimension -fn compute_array_dims(arr: Option) -> Result>>> { - let mut value = match arr { - Some(arr) => arr, - None => return Ok(None), - }; - if value.is_empty() { - return Ok(None); - } - let mut res = vec![Some(value.len() as u64)]; - - loop { - match value.data_type() { - DataType::List(..) => { - value = downcast_arg!(value, ListArray).value(0); - res.push(Some(value.len() as u64)); - } - _ => return Ok(Some(res)), - } - } -} - fn check_datatypes(name: &str, args: &[&ArrayRef]) -> Result<()> { let data_type = args[0].data_type(); if !args.iter().all(|arg| { @@ -1938,40 +1916,6 @@ pub fn array_intersect(args: &[ArrayRef]) -> Result { general_set_op(array1, array2, SetOp::Intersect) } -/// Cardinality SQL function -pub fn cardinality(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("cardinality expects one argument"); - } - - match &args[0].data_type() { - DataType::List(_) => { - let list_array = as_list_array(&args[0])?; - generic_list_cardinality::(list_array) - } - DataType::LargeList(_) => { - let list_array = as_large_list_array(&args[0])?; - generic_list_cardinality::(list_array) - } - other => { - exec_err!("cardinality does not support type '{:?}'", other) - } - } -} - -fn generic_list_cardinality( - array: &GenericListArray, -) -> Result { - let result = array - .iter() - .map(|arr| match compute_array_dims(arr)? { - Some(vector) => Ok(Some(vector.iter().map(|x| x.unwrap()).product::())), - None => Ok(None), - }) - .collect::>()?; - Ok(Arc::new(result) as ArrayRef) -} - // Create new offsets that are euqiavlent to `flatten` the array. fn get_offsets_for_flatten( offsets: OffsetBuffer, @@ -2074,72 +2018,6 @@ pub fn array_length(args: &[ArrayRef]) -> Result { } } -/// Array_dims SQL function -pub fn array_dims(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("array_dims needs one argument"); - } - - let data = match args[0].data_type() { - DataType::List(_) => { - let array = as_list_array(&args[0])?; - array - .iter() - .map(compute_array_dims) - .collect::>>()? - } - DataType::LargeList(_) => { - let array = as_large_list_array(&args[0])?; - array - .iter() - .map(compute_array_dims) - .collect::>>()? - } - array_type => { - return exec_err!("array_dims does not support type '{array_type:?}'"); - } - }; - - let result = ListArray::from_iter_primitive::(data); - - Ok(Arc::new(result) as ArrayRef) -} - -/// Array_ndims SQL function -pub fn array_ndims(args: &[ArrayRef]) -> Result { - if args.len() != 1 { - return exec_err!("array_ndims needs one argument"); - } - - fn general_list_ndims( - array: &GenericListArray, - ) -> Result { - let mut data = Vec::new(); - let ndims = datafusion_common::utils::list_ndims(array.data_type()); - - for arr in array.iter() { - if arr.is_some() { - data.push(Some(ndims)) - } else { - data.push(None) - } - } - - Ok(Arc::new(UInt64Array::from(data)) as ArrayRef) - } - match args[0].data_type() { - DataType::List(_) => { - let array = as_list_array(&args[0])?; - general_list_ndims::(array) - } - DataType::LargeList(_) => { - let array = as_large_list_array(&args[0])?; - general_list_ndims::(array) - } - array_type => exec_err!("array_ndims does not support type {array_type:?}"), - } -} - /// Represents the type of comparison for array_has. #[derive(Debug, PartialEq)] enum ComparisonType { diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index f3f37b20e3d2..96ebb4b3bbcb 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -339,9 +339,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayHas => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_has)(args) }), - BuiltinScalarFunction::ArrayDims => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_dims)(args) - }), BuiltinScalarFunction::ArrayDistinct => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_distinct)(args) }), @@ -357,9 +354,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::Flatten => { Arc::new(|args| make_scalar_function_inner(array_expressions::flatten)(args)) } - BuiltinScalarFunction::ArrayNdims => Arc::new(|args| { - make_scalar_function_inner(array_expressions::array_ndims)(args) - }), BuiltinScalarFunction::ArrayPopFront => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_pop_front)(args) }), @@ -405,9 +399,6 @@ pub fn create_physical_fun( BuiltinScalarFunction::ArrayIntersect => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_intersect)(args) }), - BuiltinScalarFunction::Cardinality => Arc::new(|args| { - make_scalar_function_inner(array_expressions::cardinality)(args) - }), BuiltinScalarFunction::ArrayResize => Arc::new(|args| { make_scalar_function_inner(array_expressions::array_resize)(args) }), @@ -516,15 +507,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function initcap") } }), - BuiltinScalarFunction::InStr => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::instr::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::instr::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function instr"), - }), BuiltinScalarFunction::Left => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { let func = invoke_if_unicode_expressions_feature_flag!(left, i32, "left"); @@ -1361,95 +1343,6 @@ mod tests { Utf8, StringArray ); - test_function!( - InStr, - &[lit("abc"), lit("b")], - Ok(Some(2)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("abc"), lit("c")], - Ok(Some(3)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("abc"), lit("d")], - Ok(Some(0)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("abc"), lit("")], - Ok(Some(1)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("Helloworld"), lit("world")], - Ok(Some(6)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("Helloworld"), lit(ScalarValue::Utf8(None))], - Ok(None), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit(ScalarValue::Utf8(None)), lit("Hello")], - Ok(None), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[ - lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), - lit(ScalarValue::LargeUtf8(Some("world".to_string()))) - ], - Ok(Some(6)), - i64, - Int64, - Int64Array - ); - test_function!( - InStr, - &[ - lit(ScalarValue::LargeUtf8(None)), - lit(ScalarValue::LargeUtf8(Some("world".to_string()))) - ], - Ok(None), - i64, - Int64, - Int64Array - ); - test_function!( - InStr, - &[ - lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), - lit(ScalarValue::LargeUtf8(None)) - ], - Ok(None), - i64, - Int64, - Int64Array - ); #[cfg(feature = "unicode_expressions")] test_function!( Left, @@ -2610,6 +2503,87 @@ mod tests { Int32Array ); #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("abc"), lit("d")], + Ok(Some(0)), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("abc"), lit("")], + Ok(Some(1)), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("Helloworld"), lit("world")], + Ok(Some(6)), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("Helloworld"), lit(ScalarValue::Utf8(None))], + Ok(None), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit(ScalarValue::Utf8(None)), lit("Hello")], + Ok(None), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[ + lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), + lit(ScalarValue::LargeUtf8(Some("world".to_string()))) + ], + Ok(Some(6)), + i64, + Int64, + Int64Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[ + lit(ScalarValue::LargeUtf8(None)), + lit(ScalarValue::LargeUtf8(Some("world".to_string()))) + ], + Ok(None), + i64, + Int64, + Int64Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[ + lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), + lit(ScalarValue::LargeUtf8(None)) + ], + Ok(None), + i64, + Int64, + Int64Array + ); + #[cfg(feature = "unicode_expressions")] test_function!( Strpos, &[lit("josé"), lit("é"),], diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index f17e39d02f06..9acbe029d438 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -636,17 +636,17 @@ enum ScalarFunction { Gcd = 85; ArrayAppend = 86; ArrayConcat = 87; - ArrayDims = 88; + // 88 was ArrayDims ArrayRepeat = 89; ArrayLength = 90; - ArrayNdims = 91; + // 91 was ArrayNdims ArrayPosition = 92; ArrayPositions = 93; ArrayPrepend = 94; ArrayRemove = 95; ArrayReplace = 96; // 97 was ArrayToString - Cardinality = 98; + // 98 was Cardinality ArrayElement = 99; ArraySlice = 100; Cot = 103; @@ -678,7 +678,7 @@ enum ScalarFunction { ArrayDistinct = 129; ArrayResize = 130; EndsWith = 131; - InStr = 132; + /// 132 was InStr MakeDate = 133; ArrayReverse = 134; RegexpLike = 135; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index 83b19013c77d..779b9a13133f 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22398,16 +22398,13 @@ impl serde::Serialize for ScalarFunction { Self::Gcd => "Gcd", Self::ArrayAppend => "ArrayAppend", Self::ArrayConcat => "ArrayConcat", - Self::ArrayDims => "ArrayDims", Self::ArrayRepeat => "ArrayRepeat", Self::ArrayLength => "ArrayLength", - Self::ArrayNdims => "ArrayNdims", Self::ArrayPosition => "ArrayPosition", Self::ArrayPositions => "ArrayPositions", Self::ArrayPrepend => "ArrayPrepend", Self::ArrayRemove => "ArrayRemove", Self::ArrayReplace => "ArrayReplace", - Self::Cardinality => "Cardinality", Self::ArrayElement => "ArrayElement", Self::ArraySlice => "ArraySlice", Self::Cot => "Cot", @@ -22436,7 +22433,6 @@ impl serde::Serialize for ScalarFunction { Self::ArrayDistinct => "ArrayDistinct", Self::ArrayResize => "ArrayResize", Self::EndsWith => "EndsWith", - Self::InStr => "InStr", Self::MakeDate => "MakeDate", Self::ArrayReverse => "ArrayReverse", Self::RegexpLike => "RegexpLike", @@ -22529,16 +22525,13 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Gcd", "ArrayAppend", "ArrayConcat", - "ArrayDims", "ArrayRepeat", "ArrayLength", - "ArrayNdims", "ArrayPosition", "ArrayPositions", "ArrayPrepend", "ArrayRemove", "ArrayReplace", - "Cardinality", "ArrayElement", "ArraySlice", "Cot", @@ -22567,7 +22560,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayDistinct", "ArrayResize", "EndsWith", - "InStr", "MakeDate", "ArrayReverse", "RegexpLike", @@ -22689,16 +22681,13 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "Gcd" => Ok(ScalarFunction::Gcd), "ArrayAppend" => Ok(ScalarFunction::ArrayAppend), "ArrayConcat" => Ok(ScalarFunction::ArrayConcat), - "ArrayDims" => Ok(ScalarFunction::ArrayDims), "ArrayRepeat" => Ok(ScalarFunction::ArrayRepeat), "ArrayLength" => Ok(ScalarFunction::ArrayLength), - "ArrayNdims" => Ok(ScalarFunction::ArrayNdims), "ArrayPosition" => Ok(ScalarFunction::ArrayPosition), "ArrayPositions" => Ok(ScalarFunction::ArrayPositions), "ArrayPrepend" => Ok(ScalarFunction::ArrayPrepend), "ArrayRemove" => Ok(ScalarFunction::ArrayRemove), "ArrayReplace" => Ok(ScalarFunction::ArrayReplace), - "Cardinality" => Ok(ScalarFunction::Cardinality), "ArrayElement" => Ok(ScalarFunction::ArrayElement), "ArraySlice" => Ok(ScalarFunction::ArraySlice), "Cot" => Ok(ScalarFunction::Cot), @@ -22727,7 +22716,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayDistinct" => Ok(ScalarFunction::ArrayDistinct), "ArrayResize" => Ok(ScalarFunction::ArrayResize), "EndsWith" => Ok(ScalarFunction::EndsWith), - "InStr" => Ok(ScalarFunction::InStr), "MakeDate" => Ok(ScalarFunction::MakeDate), "ArrayReverse" => Ok(ScalarFunction::ArrayReverse), "RegexpLike" => Ok(ScalarFunction::RegexpLike), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index 2eeee3d10e8c..16efb9da016c 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2725,17 +2725,17 @@ pub enum ScalarFunction { Gcd = 85, ArrayAppend = 86, ArrayConcat = 87, - ArrayDims = 88, + /// 88 was ArrayDims ArrayRepeat = 89, ArrayLength = 90, - ArrayNdims = 91, + /// 91 was ArrayNdims ArrayPosition = 92, ArrayPositions = 93, ArrayPrepend = 94, ArrayRemove = 95, ArrayReplace = 96, /// 97 was ArrayToString - Cardinality = 98, + /// 98 was Cardinality ArrayElement = 99, ArraySlice = 100, Cot = 103, @@ -2767,7 +2767,7 @@ pub enum ScalarFunction { ArrayDistinct = 129, ArrayResize = 130, EndsWith = 131, - InStr = 132, + /// / 132 was InStr MakeDate = 133, ArrayReverse = 134, RegexpLike = 135, @@ -2858,16 +2858,13 @@ impl ScalarFunction { ScalarFunction::Gcd => "Gcd", ScalarFunction::ArrayAppend => "ArrayAppend", ScalarFunction::ArrayConcat => "ArrayConcat", - ScalarFunction::ArrayDims => "ArrayDims", ScalarFunction::ArrayRepeat => "ArrayRepeat", ScalarFunction::ArrayLength => "ArrayLength", - ScalarFunction::ArrayNdims => "ArrayNdims", ScalarFunction::ArrayPosition => "ArrayPosition", ScalarFunction::ArrayPositions => "ArrayPositions", ScalarFunction::ArrayPrepend => "ArrayPrepend", ScalarFunction::ArrayRemove => "ArrayRemove", ScalarFunction::ArrayReplace => "ArrayReplace", - ScalarFunction::Cardinality => "Cardinality", ScalarFunction::ArrayElement => "ArrayElement", ScalarFunction::ArraySlice => "ArraySlice", ScalarFunction::Cot => "Cot", @@ -2896,7 +2893,6 @@ impl ScalarFunction { ScalarFunction::ArrayDistinct => "ArrayDistinct", ScalarFunction::ArrayResize => "ArrayResize", ScalarFunction::EndsWith => "EndsWith", - ScalarFunction::InStr => "InStr", ScalarFunction::MakeDate => "MakeDate", ScalarFunction::ArrayReverse => "ArrayReverse", ScalarFunction::RegexpLike => "RegexpLike", @@ -2983,16 +2979,13 @@ impl ScalarFunction { "Gcd" => Some(Self::Gcd), "ArrayAppend" => Some(Self::ArrayAppend), "ArrayConcat" => Some(Self::ArrayConcat), - "ArrayDims" => Some(Self::ArrayDims), "ArrayRepeat" => Some(Self::ArrayRepeat), "ArrayLength" => Some(Self::ArrayLength), - "ArrayNdims" => Some(Self::ArrayNdims), "ArrayPosition" => Some(Self::ArrayPosition), "ArrayPositions" => Some(Self::ArrayPositions), "ArrayPrepend" => Some(Self::ArrayPrepend), "ArrayRemove" => Some(Self::ArrayRemove), "ArrayReplace" => Some(Self::ArrayReplace), - "Cardinality" => Some(Self::Cardinality), "ArrayElement" => Some(Self::ArrayElement), "ArraySlice" => Some(Self::ArraySlice), "Cot" => Some(Self::Cot), @@ -3021,7 +3014,6 @@ impl ScalarFunction { "ArrayDistinct" => Some(Self::ArrayDistinct), "ArrayResize" => Some(Self::ArrayResize), "EndsWith" => Some(Self::EndsWith), - "InStr" => Some(Self::InStr), "MakeDate" => Some(Self::MakeDate), "ArrayReverse" => Some(Self::ArrayReverse), "RegexpLike" => Some(Self::RegexpLike), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index cb27b63b53cc..e21a5717aa7f 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -47,18 +47,18 @@ use datafusion_common::{ use datafusion_expr::expr::Unnest; use datafusion_expr::window_frame::{check_window_frame, regularize_window_order_by}; use datafusion_expr::{ - acosh, array, array_append, array_concat, array_dims, array_distinct, array_element, - array_empty, array_except, array_has, array_has_all, array_has_any, array_intersect, - array_length, array_ndims, array_pop_back, array_pop_front, array_position, - array_positions, array_prepend, array_remove, array_remove_all, array_remove_n, - array_repeat, array_replace, array_replace_all, array_replace_n, array_resize, - array_slice, array_sort, array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, - bit_length, btrim, cardinality, cbrt, ceil, character_length, chr, coalesce, - concat_expr, concat_ws_expr, cos, cosh, cot, current_date, current_time, degrees, + acosh, array, array_append, array_concat, array_distinct, array_element, array_empty, + array_except, array_has, array_has_all, array_has_any, array_intersect, array_length, + array_pop_back, array_pop_front, array_position, array_positions, array_prepend, + array_remove, array_remove_all, array_remove_n, array_repeat, array_replace, + array_replace_all, array_replace_n, array_resize, array_slice, array_sort, + array_union, arrow_typeof, ascii, asinh, atan, atan2, atanh, bit_length, btrim, cbrt, + ceil, character_length, chr, coalesce, concat_expr, concat_ws_expr, cos, cosh, cot, + current_date, current_time, degrees, digest, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, - factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, instr, iszero, - lcm, left, levenshtein, ln, log, log10, log2, + factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, iszero, lcm, + left, levenshtein, ln, log, log10, log2, logical_plan::{PlanType, StringifiedPlan}, lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power, radians, random, regexp_like, regexp_replace, repeat, replace, reverse, right, round, rpad, @@ -486,12 +486,10 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayHasAll => Self::ArrayHasAll, ScalarFunction::ArrayHasAny => Self::ArrayHasAny, ScalarFunction::ArrayHas => Self::ArrayHas, - ScalarFunction::ArrayDims => Self::ArrayDims, ScalarFunction::ArrayDistinct => Self::ArrayDistinct, ScalarFunction::ArrayElement => Self::ArrayElement, ScalarFunction::Flatten => Self::Flatten, ScalarFunction::ArrayLength => Self::ArrayLength, - ScalarFunction::ArrayNdims => Self::ArrayNdims, ScalarFunction::ArrayPopFront => Self::ArrayPopFront, ScalarFunction::ArrayPopBack => Self::ArrayPopBack, ScalarFunction::ArrayPosition => Self::ArrayPosition, @@ -509,7 +507,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ArrayIntersect => Self::ArrayIntersect, ScalarFunction::ArrayUnion => Self::ArrayUnion, ScalarFunction::ArrayResize => Self::ArrayResize, - ScalarFunction::Cardinality => Self::Cardinality, ScalarFunction::Array => Self::MakeArray, ScalarFunction::Md5 => Self::MD5, ScalarFunction::Sha224 => Self::SHA224, @@ -527,7 +524,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator, ScalarFunction::EndsWith => Self::EndsWith, ScalarFunction::InitCap => Self::InitCap, - ScalarFunction::InStr => Self::InStr, ScalarFunction::Left => Self::Left, ScalarFunction::Lpad => Self::Lpad, ScalarFunction::Random => Self::Random, @@ -1523,16 +1519,10 @@ pub fn parse_expr( parse_expr(&args[2], registry, codec)?, parse_expr(&args[3], registry, codec)?, )), - ScalarFunction::Cardinality => { - Ok(cardinality(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayLength => Ok(array_length( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, )), - ScalarFunction::ArrayDims => { - Ok(array_dims(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayDistinct => { Ok(array_distinct(parse_expr(&args[0], registry, codec)?)) } @@ -1543,9 +1533,6 @@ pub fn parse_expr( ScalarFunction::ArrayEmpty => { Ok(array_empty(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::ArrayNdims => { - Ok(array_ndims(parse_expr(&args[0], registry, codec)?)) - } ScalarFunction::ArrayUnion => Ok(array_union( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, @@ -1647,10 +1634,6 @@ pub fn parse_expr( ScalarFunction::InitCap => { Ok(initcap(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::InStr => Ok(instr( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), ScalarFunction::Gcd => Ok(gcd( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index d875848a284c..8257540792fa 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1463,12 +1463,10 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArrayHasAll => Self::ArrayHasAll, BuiltinScalarFunction::ArrayHasAny => Self::ArrayHasAny, BuiltinScalarFunction::ArrayHas => Self::ArrayHas, - BuiltinScalarFunction::ArrayDims => Self::ArrayDims, BuiltinScalarFunction::ArrayDistinct => Self::ArrayDistinct, BuiltinScalarFunction::ArrayElement => Self::ArrayElement, BuiltinScalarFunction::Flatten => Self::Flatten, BuiltinScalarFunction::ArrayLength => Self::ArrayLength, - BuiltinScalarFunction::ArrayNdims => Self::ArrayNdims, BuiltinScalarFunction::ArrayPopFront => Self::ArrayPopFront, BuiltinScalarFunction::ArrayPopBack => Self::ArrayPopBack, BuiltinScalarFunction::ArrayPosition => Self::ArrayPosition, @@ -1486,7 +1484,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ArraySlice => Self::ArraySlice, BuiltinScalarFunction::ArrayIntersect => Self::ArrayIntersect, BuiltinScalarFunction::ArrayUnion => Self::ArrayUnion, - BuiltinScalarFunction::Cardinality => Self::Cardinality, BuiltinScalarFunction::MakeArray => Self::Array, BuiltinScalarFunction::MD5 => Self::Md5, BuiltinScalarFunction::SHA224 => Self::Sha224, @@ -1504,7 +1501,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator, BuiltinScalarFunction::EndsWith => Self::EndsWith, BuiltinScalarFunction::InitCap => Self::InitCap, - BuiltinScalarFunction::InStr => Self::InStr, BuiltinScalarFunction::Left => Self::Left, BuiltinScalarFunction::Lpad => Self::Lpad, BuiltinScalarFunction::Random => Self::Random, diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 0ec44190ef7a..702ae99babd8 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -583,6 +583,11 @@ async fn roundtrip_expr_api() -> Result<()> { encode(col("a").cast_to(&DataType::Utf8, &schema)?, lit("hex")), decode(lit("1234"), lit("hex")), array_to_string(array(vec![lit(1), lit(2), lit(3)]), lit(",")), + array_dims(array(vec![lit(1), lit(2), lit(3)])), + array_ndims(array(vec![lit(1), lit(2), lit(3)])), + cardinality(array(vec![lit(1), lit(2), lit(3)])), + range(lit(1), lit(10), lit(2)), + gen_series(lit(1), lit(10), lit(2)), ]; // ensure expressions created with the expr api can be round tripped diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index 3e7a62e398c3..3a24a5418783 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -751,7 +751,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let fun = BuiltinScalarFunction::InStr; + let fun = BuiltinScalarFunction::Strpos; let substr = self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?; let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?; diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 48b5a0af7253..5ff253c1a34a 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -2042,5 +2042,5 @@ select position('' in '') 1 -query error DataFusion error: Error during planning: The INSTR/POSITION function can only accept strings, but got Int64. +query error DataFusion error: Error during planning: The STRPOS/INSTR/POSITION function can only accept strings, but got Int64. select position(1 in 1) diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index d7085631777c..f0483aec8946 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -465,6 +465,10 @@ SELECT TIME 'not a time' as time; query error Cannot cast string '24:01:02' to value of Time64\(Nanosecond\) type SELECT TIME '24:01:02' as time; +# invalid timezone +query error Arrow error: Parser error: Invalid timezone "ZZ": 'ZZ' is not a valid timezone +SELECT TIMESTAMP '2023-12-05T21:58:10.45ZZ'; + statement ok set datafusion.optimizer.skip_failed_rules = true diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index cd1fbdabea1c..41cf81fe1ed4 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -832,12 +832,7 @@ initcap(str) ### `instr` -Returns the location where substr first appeared in str (counting from 1). -If substr does not appear in str, return 0. - -``` -instr(str, substr) -``` +_Alias of [strpos](#strpos)._ #### Arguments @@ -1108,6 +1103,10 @@ strpos(str, substr) - **substr**: Substring expression to search for. Can be a constant, column, or function, and any combination of string operators. +#### Aliases + +- instr + ### `substr` Extracts a substring of a specified number of characters from a specific @@ -1440,7 +1439,8 @@ Additional examples can be found [here](https://github.com/apache/arrow-datafusi ### `position` -Returns the position of substr in orig_str +Returns the position of `substr` in `origstr` (counting from 1). If `substr` does +not appear in `origstr`, return 0. ``` position(substr in origstr) @@ -1448,7 +1448,7 @@ position(substr in origstr) #### Arguments -- **substr**: he pattern string. +- **substr**: The pattern string. - **origstr**: The model string. ## Time and Date Functions