From 6c55b029f9199e64c4fe69d9b561f790af3f1586 Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Mon, 2 Dec 2024 14:18:44 -0600 Subject: [PATCH] feat(stdlib): Add `zip` function (#1158) * feat(stdlib): Add `zip` function * Change parameter name to `array` * Fix output type def --- benches/stdlib.rs | 12 +++- changelog.d/1158.feature.md | 2 + src/stdlib/mod.rs | 3 + src/stdlib/zip.rs | 125 ++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 1 deletion(-) create mode 100644 changelog.d/1158.feature.md create mode 100644 src/stdlib/zip.rs diff --git a/benches/stdlib.rs b/benches/stdlib.rs index 441bf29c25..25108eebe7 100644 --- a/benches/stdlib.rs +++ b/benches/stdlib.rs @@ -21,6 +21,7 @@ criterion_group!( camelcase, ceil, chunks, + community_id, compact, contains, decode_base16, @@ -169,7 +170,7 @@ criterion_group!( //uuidv4, upcase, values, - community_id, + zip, ); criterion_main!(benches); @@ -2933,3 +2934,12 @@ bench_function! { want: Ok("INPUT_STRING"), } } + +bench_function! { + zip => vrl::stdlib::Zip; + + default { + args: func_args![array: value!([["one", "two", "three", "four"], ["one", 2, null, true]])], + want: Ok(value!([["one","one"], ["two",2], ["three",null], ["four",true]])), + } +} diff --git a/changelog.d/1158.feature.md b/changelog.d/1158.feature.md new file mode 100644 index 0000000000..78a7fd819c --- /dev/null +++ b/changelog.d/1158.feature.md @@ -0,0 +1,2 @@ +Added new `zip` function to iterate over an array of arrays and produce a new +arrays containing an item from each one. diff --git a/src/stdlib/mod.rs b/src/stdlib/mod.rs index acc5f96538..74f1c66bfc 100644 --- a/src/stdlib/mod.rs +++ b/src/stdlib/mod.rs @@ -212,6 +212,7 @@ cfg_if::cfg_if! { mod uuid_v4; mod uuid_v7; mod values; + mod zip; // ----------------------------------------------------------------------------- @@ -390,6 +391,7 @@ cfg_if::cfg_if! { pub use uuid_v4::UuidV4; pub use uuid_v7::UuidV7; pub use values::Values; + pub use zip::Zip; pub use self::array::Array; pub use self::md5::Md5; pub use self::seahash::Seahash; @@ -579,5 +581,6 @@ pub fn all() -> Vec> { Box::new(UuidV4), Box::new(UuidV7), Box::new(Values), + Box::new(Zip), ] } diff --git a/src/stdlib/zip.rs b/src/stdlib/zip.rs new file mode 100644 index 0000000000..48f377aef9 --- /dev/null +++ b/src/stdlib/zip.rs @@ -0,0 +1,125 @@ +use crate::compiler::prelude::*; + +fn zip(value: Value) -> Resolved { + Ok(MultiZip( + value + .try_array()? + .into_iter() + .map(|value| value.try_array().map(Vec::into_iter)) + .collect::>()?, + ) + .collect::>() + .into()) +} + +struct MultiZip(Vec>); + +impl Iterator for MultiZip { + type Item = Vec; + fn next(&mut self) -> Option { + self.0.iter_mut().map(Iterator::next).collect() + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Zip; + +impl Function for Zip { + fn identifier(&self) -> &'static str { + "zip" + } + + fn parameters(&self) -> &'static [Parameter] { + &[Parameter { + keyword: "array", + kind: kind::ARRAY, + required: true, + }] + } + + fn examples(&self) -> &'static [Example] { + &[Example { + title: "merge three arrays into an array of 3-tuples", + source: r#"zip([["a", "b", "c"], [1, null, true], [4, 5, 6]])"#, + result: Ok(r#"[["a", 1, 4], ["b", null, 5], ["c", true, 6]]"#), + }] + } + + fn compile( + &self, + _state: &TypeState, + _ctx: &mut FunctionCompileContext, + arguments: ArgumentList, + ) -> Compiled { + let array = arguments.required("array"); + Ok(ZipFn { array }.as_expr()) + } +} + +#[derive(Debug, Clone)] +struct ZipFn { + array: Box, +} + +impl FunctionExpression for ZipFn { + fn resolve(&self, ctx: &mut Context) -> Resolved { + zip(self.array.resolve(ctx)?) + } + + fn type_def(&self, _state: &TypeState) -> TypeDef { + TypeDef::array(Collection::any()) + } +} + +#[cfg(test)] +mod tests { + use crate::value; + + use super::*; + + test_function![ + zip => Zip; + + zips_two_arrays { + args: func_args![array: value!([[1, 2, 3], [4, 5, 6]])], + want: Ok(value!([[1, 4], [2, 5], [3, 6]])), + tdef: TypeDef::array(Collection::any()), + } + + zips_three_arrays { + args: func_args![array: value!([[1, 2, 3], [4, 5, 6], [7, 8, 9]])], + want: Ok(value!([[1, 4, 7], [2, 5, 8], [3, 6, 9]])), + tdef: TypeDef::array(Collection::any()), + } + + uses_shortest_length1 { + args: func_args![array: value!([[1, 2, 3], [4, 5]])], + want: Ok(value!([[1, 4], [2, 5]])), + tdef: TypeDef::array(Collection::any()), + } + + uses_shortest_length2 { + args: func_args![array: value!([[1, 2], [4, 5, 6]])], + want: Ok(value!([[1, 4], [2, 5]])), + tdef: TypeDef::array(Collection::any()), + } + + requires_outer_array { + args: func_args![array: 1], + want: Err("expected array, got integer"), + tdef: TypeDef::array(Collection::any()), + } + + requires_inner_arrays1 { + args: func_args![array: value!([true, []])], + want: Err("expected array, got boolean"), + tdef: TypeDef::array(Collection::any()), + } + + requires_inner_arrays2 { + args: func_args![array: value!([[], null])], + want: Err("expected array, got null"), + tdef: TypeDef::array(Collection::any()), + } + ]; +}