From 38d9bdde69381b4a7516346f862310700b6eb96e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Sun, 18 Aug 2024 13:20:40 -0400 Subject: [PATCH] Adding benchmark for string_arrays --- datafusion/functions/Cargo.toml | 4 + datafusion/functions/benches/string_arrays.rs | 155 ++++++++++++++++++ datafusion/functions/src/utils.rs | 2 + 3 files changed, 161 insertions(+) create mode 100644 datafusion/functions/benches/string_arrays.rs diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 2b3f80fc930b..ef07983e384b 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -156,3 +156,7 @@ required-features = ["unicode_expressions"] harness = false name = "repeat" required-features = ["string_expressions"] + +[[bench]] +harness = false +name = "string_arrays" \ No newline at end of file diff --git a/datafusion/functions/benches/string_arrays.rs b/datafusion/functions/benches/string_arrays.rs new file mode 100644 index 000000000000..46ceea6ab645 --- /dev/null +++ b/datafusion/functions/benches/string_arrays.rs @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use arrow::array::{Array, ArrayAccessor, GenericStringArray, OffsetSizeTrait, StringViewArray}; +use arrow::util::bench_util::{ + create_string_array_with_len, create_string_view_array_with_len, +}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; +use datafusion_functions::utils::{Iter, StringArrayType, StringArrays}; + +fn criterion_benchmark(c: &mut Criterion) { + for size in [4096, 16384] { + let mut group = c.benchmark_group("string_arrays benchmark"); + + for str_len in [4, 32] { + let string_view_array = create_string_view_array_with_len(size, 0.2, str_len, true); + let string_array = create_string_array_with_len::(size, 0.2, str_len); + + group.bench_function(BenchmarkId::new(format!("StringArrays-iter-{size}"), str_len), |b| { + b.iter(|| { + black_box({ + let array = StringArrays::try_from(&string_view_array).unwrap(); + array.iter().for_each(|v| { + if let Some(v) = v { + let _val = v; + } + }); + + let array = StringArrays::try_from(&string_array).unwrap(); + array.iter().for_each(|v| { + if let Some(v) = v { + let _val = v; + } + }); + }) + }) + }); + group.bench_function(BenchmarkId::new(format!("StringArrays-using_loop-{size}"), str_len), |b| { + b.iter(|| { + black_box({ + let array = StringArrays::try_from(&string_view_array).unwrap(); + for i in 0..size { + if !array.is_null(i) { + let _val = array.value(i); + }; + } + + let array = StringArrays::try_from(&string_array).unwrap(); + for i in 0..size { + if !array.is_null(i) { + let _val = array.value(i); + }; + } + }) + }) + }); + + group.bench_function(BenchmarkId::new(format!("direct-iter-{size}"), str_len), |b| { + b.iter(|| { + black_box({ + string_view_array.iter().for_each(|v| { + if let Some(v) = v { + let _val = v; + } + }); + string_array.iter().for_each(|v| { + if let Some(v) = v { + let _val = v; + } + }); + }) + }) + }); + + group.bench_function(BenchmarkId::new(format!("direct-using_loop-{size}"), str_len), |b| { + b.iter(|| { + black_box({ + for i in 0..size { + if !string_view_array.is_null(i) { + let _val = string_view_array.value(i); + }; + } + + for i in 0..size { + if !string_array.is_null(i) { + let _val = string_array.value(i); + }; + } + }) + }) + }); + + group.bench_function(BenchmarkId::new(format!("StringArrayType-iter-{size}"), str_len), |b| { + b.iter(|| { + black_box({ + fn test<'a, T, S>(string_array: S) + where + T: OffsetSizeTrait, + S: StringArrayType<'a>, + { + string_array.iter().for_each(|v| { + if let Some(v) = v { + let _val = v; + } + }); + } + + test::(&string_view_array.clone()); + test::>(&string_array.clone()); + }) + }) + }); + + group.bench_function(BenchmarkId::new(format!("StringArrayType-using_loop-{size}"), str_len), |b| { + b.iter(|| { + black_box({ + fn test<'a, T, S>(string_array: S, size: usize) + where + T: OffsetSizeTrait, + S: StringArrayType<'a>, + { + for i in 0..size { + if !string_array.is_null(i) { + let _val = StringArrayType::value(&string_array, i); + }; + } + } + + test::(&string_view_array.clone(), size); + test::>(&string_array.clone(), size); + }) + }) + }); + } + } +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs index c3937b1f8e1b..cb2584133865 100644 --- a/datafusion/functions/src/utils.rs +++ b/datafusion/functions/src/utils.rs @@ -352,6 +352,7 @@ impl<'a> Display for StringArrays<'a> { impl<'a> ArrayAccessor for &'a StringArrays<'a> { type Item = &'a str; + #[inline] fn value(&self, index: usize) -> Self::Item { match self { StringArrays::StringView(sv) => StringArrayType::value(sv, index), @@ -372,6 +373,7 @@ impl<'a> ArrayAccessor for &'a StringArrays<'a> { impl<'a> ArrayAccessor for StringArrays<'a> { type Item = &'a str; + #[inline] fn value(&self, index: usize) -> Self::Item { match self { StringArrays::StringView(sv) => StringArrayType::value(sv, index),