From de85af38d5ca5be0920bbd1a9dd3999e20a85e59 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 11 Dec 2024 17:20:52 +0000 Subject: [PATCH 1/3] Sum statistics --- Cargo.lock | 184 +++++++++--------- Cargo.toml | 12 +- bench-vortex/benches/clickbench.rs | 2 +- bench-vortex/src/bin/clickbench.rs | 4 +- bench-vortex/src/clickbench.rs | 169 ++++++++-------- vortex-array/src/array/primitive/stats.rs | 32 ++- vortex-array/src/array/varbin/stats.rs | 1 + vortex-array/src/data/viewed.rs | 12 ++ vortex-array/src/stats/flatbuffers.rs | 5 + vortex-array/src/stats/mod.rs | 14 +- vortex-array/src/stats/statsset.rs | 6 + vortex-datafusion/src/memory/statistics.rs | 8 + vortex-datafusion/src/persistent/format.rs | 11 +- .../src/persistent/statistics.rs | 8 +- vortex-dtype/src/ptype.rs | 10 + vortex-file/src/read/builder/initial_read.rs | 12 +- vortex-file/src/write/writer.rs | 1 + .../flatbuffers/vortex-array/array.fbs | 1 + vortex-flatbuffers/src/generated/array.rs | 65 ++++--- vortex-flatbuffers/src/generated/dtype.rs | 126 ++++++------ vortex-flatbuffers/src/generated/footer.rs | 30 +-- vortex-flatbuffers/src/generated/message.rs | 56 +++--- vortex-flatbuffers/src/generated/scalar.rs | 30 +-- vortex-scalar/src/pvalue.rs | 65 ++++++- 24 files changed, 533 insertions(+), 331 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48c8fc5f6c..1f879171f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1191,10 +1191,7 @@ dependencies = [ [[package]] name = "datafusion" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbba0799cf6913b456ed07a94f0f3b6e12c62a5d88b10809e2284a0f2b915c05" dependencies = [ - "ahash", "arrow", "arrow-array", "arrow-ipc", @@ -1210,6 +1207,7 @@ dependencies = [ "datafusion-expr", "datafusion-functions", "datafusion-functions-aggregate", + "datafusion-functions-table", "datafusion-functions-window", "datafusion-optimizer", "datafusion-physical-expr", @@ -1219,17 +1217,12 @@ dependencies = [ "datafusion-sql", "futures", "glob", - "half", - "hashbrown 0.14.5", - "indexmap", "itertools 0.13.0", "log", - "num_cpus", "object_store", "parking_lot", "parquet", "paste", - "pin-project-lite", "rand", "sqlparser", "tempfile", @@ -1241,8 +1234,6 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7493c5c2d40eec435b13d92e5703554f4efc7059451fcb8d3a79580ff0e45560" dependencies = [ "arrow-schema", "async-trait", @@ -1256,8 +1247,6 @@ dependencies = [ [[package]] name = "datafusion-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24953049ebbd6f8964f91f60aa3514e121b5e81e068e33b60e77815ab369b25c" dependencies = [ "ahash", "arrow", @@ -1268,31 +1257,31 @@ dependencies = [ "half", "hashbrown 0.14.5", "indexmap", - "instant", "libc", - "num_cpus", "object_store", "parquet", "paste", + "recursive", "sqlparser", "tokio", + "web-time", ] [[package]] name = "datafusion-common-runtime" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f06df4ef76872e11c924d3c814fd2a8dd09905ed2e2195f71c857d78abd19685" dependencies = [ "log", "tokio", ] +[[package]] +name = "datafusion-doc" +version = "43.0.0" + [[package]] name = "datafusion-execution" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bbdcb628d690f3ce5fea7de81642b514486d58ff9779a51f180a69a4eadb361" dependencies = [ "arrow", "chrono", @@ -1300,7 +1289,6 @@ dependencies = [ "datafusion-common", "datafusion-expr", "futures", - "hashbrown 0.14.5", "log", "object_store", "parking_lot", @@ -1312,8 +1300,6 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8036495980e3131f706b7d33ab00b4492d73dc714e3cb74d11b50f9602a73246" dependencies = [ "ahash", "arrow", @@ -1321,12 +1307,14 @@ dependencies = [ "arrow-buffer", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-expr-common", "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr-common", "indexmap", "paste", + "recursive", "serde_json", "sqlparser", "strum", @@ -1336,8 +1324,6 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4da0f3cb4669f9523b403d6b5a0ec85023e0ab3bf0183afd1517475b3e64fdd2" dependencies = [ "arrow", "datafusion-common", @@ -1348,16 +1334,16 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52c4012648b34853e40a2c6bcaa8772f837831019b68aca384fb38436dba162" dependencies = [ "arrow", "arrow-buffer", "base64", "chrono", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", + "datafusion-macros", "hashbrown 0.14.5", "hex", "itertools 0.13.0", @@ -1371,20 +1357,19 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5b8bb624597ba28ed7446df4a9bd7c7a7bde7c578b6b527da3f47371d5f6741" dependencies = [ "ahash", "arrow", "arrow-schema", "datafusion-common", + "datafusion-doc", "datafusion-execution", "datafusion-expr", "datafusion-functions-aggregate-common", + "datafusion-macros", "datafusion-physical-expr", "datafusion-physical-expr-common", "half", - "indexmap", "log", "paste", ] @@ -1392,8 +1377,6 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fb06208fc470bc8cf1ce2d9a1159d42db591f2c7264a8c1776b53ad8f675143" dependencies = [ "ahash", "arrow", @@ -1403,11 +1386,32 @@ dependencies = [ "rand", ] +[[package]] +name = "datafusion-functions-table" +version = "43.0.0" +dependencies = [ + "ahash", + "arrow", + "arrow-schema", + "async-trait", + "datafusion-catalog", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-functions-aggregate-common", + "datafusion-physical-expr", + "datafusion-physical-expr-common", + "datafusion-physical-plan", + "half", + "indexmap", + "log", + "parking_lot", + "paste", +] + [[package]] name = "datafusion-functions-window" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ae23356c634e54c59f7c51acb7a5b9f6240ffb2cf997049a1a24a8a88598dbe" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1421,18 +1425,23 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4b3d6ff7794acea026de36007077a06b18b89e4f9c3fea7f2215f9f7dd9059b" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", ] +[[package]] +name = "datafusion-macros" +version = "43.0.0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "datafusion-optimizer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bec6241eb80c595fa0e1a8a6b69686b5cf3bd5fdacb8319582a0943b0bd788aa" dependencies = [ "arrow", "async-trait", @@ -1440,28 +1449,23 @@ dependencies = [ "datafusion-common", "datafusion-expr", "datafusion-physical-expr", - "hashbrown 0.14.5", "indexmap", "itertools 0.13.0", "log", - "paste", + "recursive", + "regex", "regex-syntax", ] [[package]] name = "datafusion-physical-expr" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3370357b8fc75ec38577700644e5d1b0bc78f38babab99c0b8bd26bafb3e4335" dependencies = [ "ahash", "arrow", "arrow-array", "arrow-buffer", - "arrow-ord", "arrow-schema", - "arrow-string", - "chrono", "datafusion-common", "datafusion-expr", "datafusion-expr-common", @@ -1479,38 +1483,33 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8b7734d94bf2fa6f6e570935b0ddddd8421179ce200065be97874e13d46a47b" dependencies = [ "ahash", "arrow", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", - "rand", + "itertools 0.13.0", ] [[package]] name = "datafusion-physical-optimizer" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eee8c479522df21d7b395640dff88c5ed05361852dce6544d7c98e9dbcebffe" dependencies = [ "arrow", - "arrow-schema", "datafusion-common", "datafusion-execution", "datafusion-expr-common", "datafusion-physical-expr", "datafusion-physical-plan", "itertools 0.13.0", + "log", + "recursive", ] [[package]] name = "datafusion-physical-plan" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17e1fc2e2c239d14e8556f2622b19a726bf6bc6962cc00c71fc52626274bee24" dependencies = [ "ahash", "arrow", @@ -1524,7 +1523,6 @@ dependencies = [ "datafusion-common-runtime", "datafusion-execution", "datafusion-expr", - "datafusion-functions-aggregate-common", "datafusion-functions-window-common", "datafusion-physical-expr", "datafusion-physical-expr-common", @@ -1544,8 +1542,6 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "43.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63e3a4ed41dbee20a5d947a59ca035c225d67dc9cbe869c10f66dcdf25e7ce51" dependencies = [ "arrow", "arrow-array", @@ -1554,9 +1550,9 @@ dependencies = [ "datafusion-expr", "indexmap", "log", + "recursive", "regex", "sqlparser", - "strum", ] [[package]] @@ -2050,12 +2046,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "hermit-abi" version = "0.4.0" @@ -2399,18 +2389,6 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", -] - [[package]] name = "integer-encoding" version = "3.0.4" @@ -2440,7 +2418,7 @@ version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" dependencies = [ - "hermit-abi 0.4.0", + "hermit-abi", "libc", "windows-sys 0.52.0", ] @@ -2883,16 +2861,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi 0.3.9", - "libc", -] - [[package]] name = "num_enum" version = "0.5.11" @@ -3290,7 +3258,7 @@ checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" dependencies = [ "cfg-if", "concurrent-queue", - "hermit-abi 0.4.0", + "hermit-abi", "pin-project-lite", "rustix", "tracing", @@ -3422,6 +3390,15 @@ dependencies = [ "prost", ] +[[package]] +name = "psm" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "200b9ff220857e53e184257720a14553b2f4aa02577d2ed9842d45d4b9654810" +dependencies = [ + "cc", +] + [[package]] name = "pyo3" version = "0.22.6" @@ -3658,6 +3635,26 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "recursive" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" +dependencies = [ + "recursive-proc-macro-impl", + "stacker", +] + +[[package]] +name = "recursive-proc-macro-impl" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" +dependencies = [ + "quote", + "syn 2.0.90", +] + [[package]] name = "redox_syscall" version = "0.5.7" @@ -4165,9 +4162,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.51.0" +version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fe11944a61da0da3f592e19a45ebe5ab92dc14a779907ff1f08fbb797bfefc7" +checksum = "9a875d8cd437cc8a97e9aeaeea352ec9a19aea99c23e9effb17757291de80b08" dependencies = [ "log", "sqlparser_derive", @@ -4190,6 +4187,19 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stacker" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "799c883d55abdb5e98af1a7b3f23b9b6de8ecada0ecac058672d7635eb48ca7b" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "windows-sys 0.59.0", +] + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/Cargo.toml b/Cargo.toml index 2e9b226ae0..3cbef801d1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,12 +67,12 @@ clap = "4.5.13" compio = "0.13" criterion = { version = "0.5.1", features = ["html_reports"] } croaring = "2.1.0" -datafusion = { version = "43.0.0", default-features = false } -datafusion-common = "43.0.0" -datafusion-execution = "43.0.0" -datafusion-expr = "43.0.0" -datafusion-physical-expr = "43.0.0" -datafusion-physical-plan = "43.0.0" +datafusion = { version = "43.0.0", default-features = false, path = "../datafusion/datafusion/core" } +datafusion-common = { version = "43.0.0", path = "../datafusion/datafusion/common" } +datafusion-execution = { version = "43.0.0", path = "../datafusion/datafusion/execution" } +datafusion-expr = { version = "43.0.0", path = "../datafusion/datafusion/expr" } +datafusion-physical-expr = { version = "43.0.0", path = "../datafusion/datafusion/physical-expr" } +datafusion-physical-plan = { version = "43.0.0", path = "../datafusion/datafusion/physical-plan" } divan = "0.1.14" enum-iterator = "2.0.0" enum-map = "2.7.3" diff --git a/bench-vortex/benches/clickbench.rs b/bench-vortex/benches/clickbench.rs index e89b7d739a..88022ef88c 100644 --- a/bench-vortex/benches/clickbench.rs +++ b/bench-vortex/benches/clickbench.rs @@ -45,7 +45,7 @@ fn benchmark(c: &mut Criterion) { let context = session_context.clone(); runtime.block_on(async move { - clickbench::register_vortex_files(&context, "hits", basepath.as_path(), &HITS_SCHEMA) + clickbench::register_vortex_files(context, "hits", basepath.as_path(), &HITS_SCHEMA) .await .unwrap(); }); diff --git a/bench-vortex/src/bin/clickbench.rs b/bench-vortex/src/bin/clickbench.rs index 8abc2dd6b5..9bfe742483 100644 --- a/bench-vortex/src/bin/clickbench.rs +++ b/bench-vortex/src/bin/clickbench.rs @@ -62,7 +62,7 @@ fn main() { // The clickbench-provided file is missing some higher-level type info, so we reprocess it // to add that info, see https://github.com/ClickHouse/ClickBench/issues/7. (0_u32..100).into_par_iter().for_each(|idx| { - let output_path = basepath.join(format!("hits_{idx}.parquet")); + let output_path = basepath.join("parquet").join(format!("hits_{idx}.parquet")); idempotent(&output_path, |output_path| { eprintln!("Fixing parquet file {idx}"); @@ -137,7 +137,7 @@ fn main() { } => { runtime.block_on(async { clickbench::register_vortex_files( - &context, + context.clone(), "hits", basepath.as_path(), &HITS_SCHEMA, diff --git a/bench-vortex/src/clickbench.rs b/bench-vortex/src/clickbench.rs index 268338e710..e93595f08e 100644 --- a/bench-vortex/src/clickbench.rs +++ b/bench-vortex/src/clickbench.rs @@ -1,4 +1,4 @@ -use std::path::Path; +use std::path::{Path, PathBuf}; use std::sync::{Arc, LazyLock}; use arrow_schema::{DataType, Field, Schema, TimeUnit}; @@ -7,6 +7,7 @@ use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; use datafusion::prelude::{ParquetReadOptions, SessionContext}; +use futures::{stream, StreamExt, TryStreamExt}; use tokio::fs::{create_dir_all, OpenOptions}; use vortex::aliases::hash_map::HashMap; use vortex::array::{ChunkedArray, StructArray}; @@ -140,99 +141,112 @@ pub static HITS_SCHEMA: LazyLock = LazyLock::new(|| { }); pub async fn register_vortex_files( - session: &SessionContext, + session: SessionContext, table_name: &str, input_path: &Path, schema: &Schema, ) -> anyhow::Result<()> { - let vortex_dir = input_path.parent().unwrap().join("vortex_compressed"); + let session2 = session.clone(); + let vortex_dir = input_path.join("vortex"); create_dir_all(&vortex_dir).await?; - for idx in 0..100 { - let parquet_file_path = input_path.join(format!("hits_{idx}.parquet")); - let output_path = vortex_dir.join(format!("hits_{idx}.{VORTEX_FILE_EXTENSION}")); - idempotent_async(&output_path, |vtx_file| async move { - eprintln!("Processing file {idx}"); - let record_batches = session - .read_parquet( - parquet_file_path.to_str().unwrap(), - ParquetReadOptions::default(), - ) - .await? - .collect() - .await?; + let format = Arc::new(VortexFormat::new(&CTX)); + let table_path = vortex_dir + .to_str() + .ok_or_else(|| vortex_err!("Path is not valid UTF-8"))?; + let table_path = format!("file://{table_path}/"); + let table_url = ListingTableUrl::parse(table_path)?; - // Create a ChunkedArray from the set of chunks. - let sts = record_batches - .into_iter() - .map(ArrayData::try_from) - .map(|a| a.unwrap().into_struct().unwrap()) - .collect::>(); + let config = ListingTableConfig::new(table_url) + .with_listing_options(ListingOptions::new(format as _)) + .with_schema(schema.clone().into()); - let mut arrays_map: HashMap, Vec> = HashMap::default(); - let mut types_map: HashMap, DType> = HashMap::default(); + let listing_table = Arc::new(ListingTable::try_new(config)?); + session2.register_table(table_name, listing_table as _)?; - for st in sts.into_iter() { - let struct_dtype = st.dtype().as_struct().unwrap(); - let names = struct_dtype.names().iter(); - let types = struct_dtype.dtypes().iter(); + let _paths: Vec = stream::iter(0..100) + .map(|idx| { + let parquet_file_path = input_path + .join("parquet") + .join(format!("hits_{idx}.parquet")); + let output_path = vortex_dir.join(format!("hits_{idx}.{VORTEX_FILE_EXTENSION}")); + let session = session.clone(); + let schema = schema.clone(); - for (field_name, field_type) in names.zip(types) { - let val = arrays_map.entry(field_name.clone()).or_default(); - val.push(st.field_by_name(field_name.as_ref()).unwrap()); + tokio::spawn(async move { + let output_path = output_path.clone(); + idempotent_async(&output_path, move |vtx_file| async move { + eprintln!("Processing file {idx}"); + let record_batches = session + .read_parquet( + parquet_file_path.to_str().unwrap(), + ParquetReadOptions::default(), + ) + .await? + .collect() + .await?; - types_map.insert(field_name.clone(), field_type.clone()); - } - } + // Create a ChunkedArray from the set of chunks. + let sts = record_batches + .into_iter() + .map(ArrayData::try_from) + .map(|a| a.unwrap().into_struct().unwrap()) + .collect::>(); - let fields = schema - .fields() - .iter() - .map(|field| { - let name: Arc = field.name().as_str().into(); - let dtype = types_map[&name].clone(); - let chunks = arrays_map.remove(&name).unwrap(); - let chunked_child = ChunkedArray::try_new(chunks, dtype).unwrap(); + let mut arrays_map: HashMap, Vec> = HashMap::default(); + let mut types_map: HashMap, DType> = HashMap::default(); - (name, chunked_child.into_array()) - }) - .collect::>(); + for st in sts.into_iter() { + let struct_dtype = st.dtype().as_struct().unwrap(); + let names = struct_dtype.names().iter(); + let types = struct_dtype.dtypes().iter(); - let data = StructArray::from_fields(&fields)?.into_array(); + for (field_name, field_type) in names.zip(types) { + let val = arrays_map.entry(field_name.clone()).or_default(); + val.push(st.field_by_name(field_name.as_ref()).unwrap()); - let compressor = SamplingCompressor::default(); - let data = compressor.compress(&data, None)?.into_array(); + types_map.insert(field_name.clone(), field_type.clone()); + } + } - let f = OpenOptions::new() - .write(true) - .truncate(true) - .create(true) - .open(&vtx_file) - .await?; + let fields = schema + .fields() + .iter() + .map(|field| { + let name: Arc = field.name().as_str().into(); + let dtype = types_map[&name].clone(); + let chunks = arrays_map.remove(&name).unwrap(); + let chunked_child = ChunkedArray::try_new(chunks, dtype).unwrap(); - let mut writer = VortexFileWriter::new(f); - writer = writer.write_array_columns(data).await?; - writer.finalize().await?; + (name, chunked_child.into_array()) + }) + .collect::>(); - anyhow::Ok(()) - }) - .await?; - } + let data = StructArray::from_fields(&fields)?.into_array(); - let format = Arc::new(VortexFormat::new(&CTX)); - let table_path = vortex_dir - .to_str() - .ok_or_else(|| vortex_err!("Path is not valid UTF-8"))?; - let table_path = format!("file://{table_path}/"); - let table_url = ListingTableUrl::parse(table_path)?; + let compressor = SamplingCompressor::default(); + let data = compressor.compress(&data, None)?.into_array(); - let config = ListingTableConfig::new(table_url) - .with_listing_options(ListingOptions::new(format as _)) - .with_schema(schema.clone().into()); + let f = OpenOptions::new() + .write(true) + .truncate(true) + .create(true) + .open(&vtx_file) + .await?; - let listing_table = Arc::new(ListingTable::try_new(config)?); + let mut writer = VortexFileWriter::new(f); + writer = writer.write_array_columns(data).await?; + writer.finalize().await?; - session.register_table(table_name, listing_table as _)?; + anyhow::Ok(()) + }) + .await + .expect("Failed to write Vortex file") + }) + }) + .buffered(16) + .try_collect::>() + .await?; Ok(()) } @@ -244,10 +258,13 @@ pub async fn register_parquet_files( schema: &Schema, ) -> anyhow::Result<()> { let format = Arc::new(ParquetFormat::new()); - let table_path = input_path - .to_str() - .ok_or_else(|| vortex_err!("Path is not valid UTF-8"))?; - let table_path = format!("file://{table_path}/"); + let table_path = input_path.join("parquet"); + let table_path = format!( + "file://{}/", + table_path + .to_str() + .ok_or_else(|| vortex_err!("Path is not valid UTF-8"))? + ); let table_url = ListingTableUrl::parse(table_path)?; let config = ListingTableConfig::new(table_url) diff --git a/vortex-array/src/array/primitive/stats.rs b/vortex-array/src/array/primitive/stats.rs index 4c530fb95c..a211ebcb14 100644 --- a/vortex-array/src/array/primitive/stats.rs +++ b/vortex-array/src/array/primitive/stats.rs @@ -2,11 +2,12 @@ use core::marker::PhantomData; use std::cmp::Ordering; use std::mem::size_of; +use arrow_array::ArrowNativeTypeOp; use arrow_buffer::buffer::BooleanBuffer; use itertools::{Itertools as _, MinMaxResult}; use num_traits::PrimInt; use vortex_dtype::half::f16; -use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability}; +use vortex_dtype::{match_each_native_ptype, DType, NativePType, Nullability, PType}; use vortex_error::{vortex_panic, VortexResult}; use vortex_scalar::Scalar; @@ -71,6 +72,9 @@ impl StatisticsVTable<[T]> for PrimitiveEncoding { ); stats } + Stat::Sum => sum_of::(array) + .map(|s| StatsSet::of(stat, s)) + .unwrap_or_else(StatsSet::default), Stat::IsConstant => { let first = array[0]; let is_constant = array.iter().all(|x| first.is_eq(*x)); @@ -90,6 +94,32 @@ impl StatisticsVTable<[T]> for PrimitiveEncoding { } } +fn sum_of(values: &[T]) -> Option { + match T::PTYPE { + PType::I8 | PType::I16 | PType::I32 | PType::I64 => { + let mut sum: i64 = 0; + for v in values { + sum = sum.checked_add(v.to_i64()?)?; + } + Some(Scalar::from(sum)) + } + PType::U8 | PType::U16 | PType::U32 | PType::U64 => { + let mut sum: u64 = 0; + for v in values { + sum = sum.checked_add(v.to_u64()?)?; + } + Some(Scalar::from(sum)) + } + PType::F16 | PType::F32 | PType::F64 => { + let mut sum: f64 = 0.0; + for v in values { + sum = sum.add_checked(v.to_f64()?).ok()?; + } + Some(Scalar::from(sum)) + } + } +} + struct NullableValues<'a, T: PStatsType>(&'a [T], &'a BooleanBuffer); impl StatisticsVTable> for PrimitiveEncoding { diff --git a/vortex-array/src/array/varbin/stats.rs b/vortex-array/src/array/varbin/stats.rs index 543391c56b..962372f2f6 100644 --- a/vortex-array/src/array/varbin/stats.rs +++ b/vortex-array/src/array/varbin/stats.rs @@ -59,6 +59,7 @@ pub fn compute_varbin_statistics>( } } Stat::Min | Stat::Max => compute_min_max(array)?, + Stat::Sum => StatsSet::default(), Stat::IsSorted => { let is_sorted = array.with_iterator(|iter| iter.flatten().is_sorted())?; let mut stats = StatsSet::of(Stat::IsSorted, is_sorted); diff --git a/vortex-array/src/data/viewed.rs b/vortex-array/src/data/viewed.rs index bd1f3bad28..1b81bb43f5 100644 --- a/vortex-array/src/data/viewed.rs +++ b/vortex-array/src/data/viewed.rs @@ -135,6 +135,18 @@ impl Statistics for ViewedArrayData { min.and_then(|v| ScalarValue::try_from(v).ok()) .map(|v| Scalar::new(self.dtype.clone(), v)) } + Stat::Sum => { + let sum = self.flatbuffer().stats()?.sum(); + sum.and_then(|v| ScalarValue::try_from(v).ok()).map(|v| { + Scalar::new( + DType::Primitive( + PType::try_from(&self.dtype).unwrap().to_widest(), + self.dtype.nullability(), + ), + v, + ) + }) + } Stat::IsConstant => self.flatbuffer().stats()?.is_constant().map(bool::into), Stat::IsSorted => self.flatbuffer().stats()?.is_sorted().map(bool::into), Stat::IsStrictSorted => self diff --git a/vortex-array/src/stats/flatbuffers.rs b/vortex-array/src/stats/flatbuffers.rs index f7ceb6b0e5..5a83110143 100644 --- a/vortex-array/src/stats/flatbuffers.rs +++ b/vortex-array/src/stats/flatbuffers.rs @@ -29,9 +29,14 @@ impl WriteFlatBuffer for &dyn Statistics { .get(Stat::Max) .map(|max| max.into_value().write_flatbuffer(fbb)); + let sum = self + .get(Stat::Sum) + .map(|sum| sum.into_value().write_flatbuffer(fbb)); + let stat_args = &crate::flatbuffers::ArrayStatsArgs { min, max, + sum, is_sorted: self.get_as::(Stat::IsSorted), is_strict_sorted: self.get_as::(Stat::IsStrictSorted), is_constant: self.get_as::(Stat::IsConstant), diff --git a/vortex-array/src/stats/mod.rs b/vortex-array/src/stats/mod.rs index afd50a4070..fb59724a3a 100644 --- a/vortex-array/src/stats/mod.rs +++ b/vortex-array/src/stats/mod.rs @@ -23,7 +23,13 @@ pub mod flatbuffers; mod statsset; /// Statistics that are used for pruning files (i.e., we want to ensure they are computed when compressing/writing). -pub const PRUNING_STATS: &[Stat] = &[Stat::Min, Stat::Max, Stat::TrueCount, Stat::NullCount]; +pub const PRUNING_STATS: &[Stat] = &[ + Stat::Min, + Stat::Max, + Stat::Sum, + Stat::TrueCount, + Stat::NullCount, +]; #[derive( Debug, Clone, Copy, PartialEq, Eq, Hash, Sequence, Enum, IntoPrimitive, TryFromPrimitive, @@ -45,6 +51,8 @@ pub enum Stat { Max, /// The minimum value in the array (ignoring nulls, unless all values are null) Min, + /// The sum of the values in the array (ignoring nulls) + Sum, /// The number of runs in the array (ignoring nulls) RunCount, /// The number of true values in the array (nulls are treated as false) @@ -92,6 +100,9 @@ impl Stat { Stat::IsStrictSorted => DType::Bool(NonNullable), Stat::Max => data_type.clone(), Stat::Min => data_type.clone(), + Stat::Sum => PType::try_from(data_type) + .map(|ptype| DType::Primitive(ptype.to_widest(), data_type.nullability())) + .unwrap_or_else(|_| DType::Null), Stat::RunCount => DType::Primitive(PType::U64, NonNullable), Stat::TrueCount => DType::Primitive(PType::U64, NonNullable), Stat::NullCount => DType::Primitive(PType::U64, NonNullable), @@ -108,6 +119,7 @@ impl Stat { Self::IsStrictSorted => "is_strict_sorted", Self::Max => "max", Self::Min => "min", + Self::Sum => "sum", Self::RunCount => "run_count", Self::TrueCount => "true_count", Self::NullCount => "null_count", diff --git a/vortex-array/src/stats/statsset.rs b/vortex-array/src/stats/statsset.rs index 0c2b12ed65..97899e9fed 100644 --- a/vortex-array/src/stats/statsset.rs +++ b/vortex-array/src/stats/statsset.rs @@ -156,6 +156,7 @@ impl StatsSet { Stat::IsStrictSorted => self.merge_is_strict_sorted(other), Stat::Max => self.merge_max(other), Stat::Min => self.merge_min(other), + Stat::Sum => self.merge_sum(other), Stat::RunCount => self.merge_run_count(other), Stat::TrueCount => self.merge_true_count(other), Stat::NullCount => self.merge_null_count(other), @@ -213,6 +214,11 @@ impl StatsSet { } } + fn merge_sum(&mut self, _other: &Self) { + // TODO(ngates): implement this by summing ScalarValue + self.clear(Stat::Sum) + } + fn merge_is_constant(&mut self, other: &Self) { if let Some(is_constant) = self.get_as(Stat::IsConstant) { if let Some(other_is_constant) = other.get_as(Stat::IsConstant) { diff --git a/vortex-datafusion/src/memory/statistics.rs b/vortex-datafusion/src/memory/statistics.rs index c2c262a2e1..656c999026 100644 --- a/vortex-datafusion/src/memory/statistics.rs +++ b/vortex-datafusion/src/memory/statistics.rs @@ -41,6 +41,14 @@ pub fn chunked_array_df_stats(array: &ChunkedArray, projection: &[usize]) -> DFR }) .map(Precision::Exact) .unwrap_or(Precision::Absent), + sum_value: arr + .statistics() + .get(Stat::Sum) + .map(|n| { + ScalarValue::try_from(n).vortex_expect("cannot convert scalar to df scalar") + }) + .map(Precision::Exact) + .unwrap_or(Precision::Absent), distinct_count: Precision::Absent, } }) diff --git a/vortex-datafusion/src/persistent/format.rs b/vortex-datafusion/src/persistent/format.rs index d46c6b161a..44e98791ef 100644 --- a/vortex-datafusion/src/persistent/format.rs +++ b/vortex-datafusion/src/persistent/format.rs @@ -129,7 +129,7 @@ impl FileFormat for VortexFormat { let mut column_statistics = Vec::with_capacity(table_schema.fields().len()); let mut total_size = 0_u64; - for col_stats in metadata_table.into_iter() { + for (col_stats, field) in metadata_table.into_iter().zip(table_schema.fields().iter()) { let col_stats = match col_stats { Some(array) => { let col_metadata_array = StructArray::try_from(array)?; @@ -137,6 +137,9 @@ impl FileFormat for VortexFormat { total_size += uncompressed_col_size(&col_metadata_array)?.unwrap_or_default(); + + log::info!("Column stats for {}: {:?}", field.name(), col_stats); + col_stats } None => ColumnStatistics::new_unknown(), @@ -158,16 +161,14 @@ impl FileFormat for VortexFormat { ) -> DFResult> { let metrics = ExecutionPlanMetricsSet::new(); - let exec = VortexExec::try_new( + Ok(VortexExec::try_new( file_scan_config, metrics, filters.cloned(), self.context.clone(), self.initial_read_cache.clone(), )? - .into_arc(); - - Ok(exec) + .into_arc()) } async fn create_writer_physical_plan( diff --git a/vortex-datafusion/src/persistent/statistics.rs b/vortex-datafusion/src/persistent/statistics.rs index 3113a755be..96997f9aa2 100644 --- a/vortex-datafusion/src/persistent/statistics.rs +++ b/vortex-datafusion/src/persistent/statistics.rs @@ -5,7 +5,7 @@ use datafusion_common::stats::Precision; use datafusion_common::ColumnStatistics; use datafusion_expr::Accumulator; use vortex_array::array::StructArray; -use vortex_array::stats::Stat; +use vortex_array::stats::{ArrayStatistics, Stat}; use vortex_array::variants::StructArrayTrait as _; use vortex_array::IntoCanonical; use vortex_error::VortexResult; @@ -39,6 +39,12 @@ pub fn array_to_col_statistics(array: &StructArray) -> VortexResult self, } } + + /// Returns the widest PType that can represent all values of this PType + /// Floats are potentially lossy. + pub const fn to_widest(self) -> Self { + match self { + Self::I8 | Self::I16 | Self::I32 | Self::I64 => Self::I64, + Self::U8 | Self::U16 | Self::U32 | Self::U64 => Self::U64, + Self::F16 | Self::F32 | Self::F64 => Self::F64, + } + } } impl Display for PType { diff --git a/vortex-file/src/read/builder/initial_read.rs b/vortex-file/src/read/builder/initial_read.rs index 1c2fe4363f..1390024451 100644 --- a/vortex-file/src/read/builder/initial_read.rs +++ b/vortex-file/src/read/builder/initial_read.rs @@ -1,4 +1,5 @@ use core::ops::Range; +use std::fmt::{Debug, Formatter}; use bytes::Bytes; use flatbuffers::{root, root_unchecked}; @@ -8,7 +9,7 @@ use vortex_io::VortexReadAt; use crate::{LazyDType, EOF_SIZE, INITIAL_READ_SIZE, MAGIC_BYTES, VERSION}; -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct InitialRead { /// The bytes from the initial read of the file, which is assumed (for now) to be sufficiently /// large to contain the schema and layout. @@ -19,6 +20,15 @@ pub struct InitialRead { pub fb_postscript_byte_range: Range, } +impl Debug for InitialRead { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("InitialRead") + .field("initial_read_offset", &self.initial_read_offset) + .field("fb_postscript_byte_range", &self.fb_postscript_byte_range) + .finish() + } +} + impl InitialRead { pub fn fb_postscript(&self) -> footer::Postscript { unsafe { diff --git a/vortex-file/src/write/writer.rs b/vortex-file/src/write/writer.rs index 6d845f38b5..c0b91d3cfe 100644 --- a/vortex-file/src/write/writer.rs +++ b/vortex-file/src/write/writer.rs @@ -26,6 +26,7 @@ use crate::{LayoutSpec, EOF_SIZE, MAGIC_BYTES, MAX_FOOTER_SIZE, VERSION}; const STATS_TO_WRITE: &[Stat] = &[ Stat::Min, Stat::Max, + Stat::Sum, Stat::TrueCount, Stat::NullCount, Stat::RunCount, diff --git a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs index 133b00774d..cc3f57f8ca 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs @@ -16,6 +16,7 @@ table Array { table ArrayStats { min: ScalarValue; max: ScalarValue; + sum: ScalarValue; is_sorted: bool = null; is_strict_sorted: bool = null; is_constant: bool = null; diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 18c00caed1..9f393e9d27 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -120,8 +120,8 @@ impl<'a> Array<'a> { Array { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args ArrayArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ArrayBuilder::new(_fbb); @@ -218,11 +218,11 @@ impl<'a> Default for ArrayArgs<'a> { } } -pub struct ArrayBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct ArrayBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> ArrayBuilder<'a, 'b> { #[inline] pub fn add_version(&mut self, version: Version) { self.fbb_.push_slot::(Array::VT_VERSION, version, Version::V0); @@ -248,7 +248,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(Array::VT_CHILDREN, children); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ArrayBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ArrayBuilder<'a, 'b> { let start = _fbb.start_table(); ArrayBuilder { fbb_: _fbb, @@ -292,23 +292,24 @@ impl<'a> flatbuffers::Follow<'a> for ArrayStats<'a> { impl<'a> ArrayStats<'a> { pub const VT_MIN: flatbuffers::VOffsetT = 4; pub const VT_MAX: flatbuffers::VOffsetT = 6; - pub const VT_IS_SORTED: flatbuffers::VOffsetT = 8; - pub const VT_IS_STRICT_SORTED: flatbuffers::VOffsetT = 10; - pub const VT_IS_CONSTANT: flatbuffers::VOffsetT = 12; - pub const VT_RUN_COUNT: flatbuffers::VOffsetT = 14; - pub const VT_TRUE_COUNT: flatbuffers::VOffsetT = 16; - pub const VT_NULL_COUNT: flatbuffers::VOffsetT = 18; - pub const VT_BIT_WIDTH_FREQ: flatbuffers::VOffsetT = 20; - pub const VT_TRAILING_ZERO_FREQ: flatbuffers::VOffsetT = 22; - pub const VT_UNCOMPRESSED_SIZE_IN_BYTES: flatbuffers::VOffsetT = 24; + pub const VT_SUM: flatbuffers::VOffsetT = 8; + pub const VT_IS_SORTED: flatbuffers::VOffsetT = 10; + pub const VT_IS_STRICT_SORTED: flatbuffers::VOffsetT = 12; + pub const VT_IS_CONSTANT: flatbuffers::VOffsetT = 14; + pub const VT_RUN_COUNT: flatbuffers::VOffsetT = 16; + pub const VT_TRUE_COUNT: flatbuffers::VOffsetT = 18; + pub const VT_NULL_COUNT: flatbuffers::VOffsetT = 20; + pub const VT_BIT_WIDTH_FREQ: flatbuffers::VOffsetT = 22; + pub const VT_TRAILING_ZERO_FREQ: flatbuffers::VOffsetT = 24; + pub const VT_UNCOMPRESSED_SIZE_IN_BYTES: flatbuffers::VOffsetT = 26; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { ArrayStats { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args ArrayStatsArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ArrayStatsBuilder::new(_fbb); @@ -318,6 +319,7 @@ impl<'a> ArrayStats<'a> { if let Some(x) = args.run_count { builder.add_run_count(x); } if let Some(x) = args.trailing_zero_freq { builder.add_trailing_zero_freq(x); } if let Some(x) = args.bit_width_freq { builder.add_bit_width_freq(x); } + if let Some(x) = args.sum { builder.add_sum(x); } if let Some(x) = args.max { builder.add_max(x); } if let Some(x) = args.min { builder.add_min(x); } if let Some(x) = args.is_constant { builder.add_is_constant(x); } @@ -342,6 +344,13 @@ impl<'a> ArrayStats<'a> { unsafe { self._tab.get::>(ArrayStats::VT_MAX, None)} } #[inline] + pub fn sum(&self) -> Option> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::>(ArrayStats::VT_SUM, None)} + } + #[inline] pub fn is_sorted(&self) -> Option { // Safety: // Created from valid Table for this object @@ -415,6 +424,7 @@ impl flatbuffers::Verifiable for ArrayStats<'_> { v.visit_table(pos)? .visit_field::>("min", Self::VT_MIN, false)? .visit_field::>("max", Self::VT_MAX, false)? + .visit_field::>("sum", Self::VT_SUM, false)? .visit_field::("is_sorted", Self::VT_IS_SORTED, false)? .visit_field::("is_strict_sorted", Self::VT_IS_STRICT_SORTED, false)? .visit_field::("is_constant", Self::VT_IS_CONSTANT, false)? @@ -431,6 +441,7 @@ impl flatbuffers::Verifiable for ArrayStats<'_> { pub struct ArrayStatsArgs<'a> { pub min: Option>>, pub max: Option>>, + pub sum: Option>>, pub is_sorted: Option, pub is_strict_sorted: Option, pub is_constant: Option, @@ -447,6 +458,7 @@ impl<'a> Default for ArrayStatsArgs<'a> { ArrayStatsArgs { min: None, max: None, + sum: None, is_sorted: None, is_strict_sorted: None, is_constant: None, @@ -460,11 +472,11 @@ impl<'a> Default for ArrayStatsArgs<'a> { } } -pub struct ArrayStatsBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct ArrayStatsBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayStatsBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> ArrayStatsBuilder<'a, 'b> { #[inline] pub fn add_min(&mut self, min: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(ArrayStats::VT_MIN, min); @@ -474,6 +486,10 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayStatsBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(ArrayStats::VT_MAX, max); } #[inline] + pub fn add_sum(&mut self, sum: flatbuffers::WIPOffset>) { + self.fbb_.push_slot_always::>(ArrayStats::VT_SUM, sum); + } + #[inline] pub fn add_is_sorted(&mut self, is_sorted: bool) { self.fbb_.push_slot_always::(ArrayStats::VT_IS_SORTED, is_sorted); } @@ -510,7 +526,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayStatsBuilder<'a, 'b, A> { self.fbb_.push_slot_always::(ArrayStats::VT_UNCOMPRESSED_SIZE_IN_BYTES, uncompressed_size_in_bytes); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ArrayStatsBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ArrayStatsBuilder<'a, 'b> { let start = _fbb.start_table(); ArrayStatsBuilder { fbb_: _fbb, @@ -529,6 +545,7 @@ impl core::fmt::Debug for ArrayStats<'_> { let mut ds = f.debug_struct("ArrayStats"); ds.field("min", &self.min()); ds.field("max", &self.max()); + ds.field("sum", &self.sum()); ds.field("is_sorted", &self.is_sorted()); ds.field("is_strict_sorted", &self.is_strict_sorted()); ds.field("is_constant", &self.is_constant()); @@ -602,13 +619,13 @@ pub unsafe fn size_prefixed_root_as_array_unchecked(buf: &[u8]) -> Array { flatbuffers::size_prefixed_root_unchecked::(buf) } #[inline] -pub fn finish_array_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( - fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub fn finish_array_buffer<'a, 'b>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish(root, None); } #[inline] -pub fn finish_size_prefixed_array_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, root: flatbuffers::WIPOffset>) { +pub fn finish_size_prefixed_array_buffer<'a, 'b>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish_size_prefixed(root, None); } diff --git a/vortex-flatbuffers/src/generated/dtype.rs b/vortex-flatbuffers/src/generated/dtype.rs index 695eee4133..af311ad69c 100644 --- a/vortex-flatbuffers/src/generated/dtype.rs +++ b/vortex-flatbuffers/src/generated/dtype.rs @@ -271,8 +271,8 @@ impl<'a> Null<'a> { Null { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, _args: &'args NullArgs ) -> flatbuffers::WIPOffset> { let mut builder = NullBuilder::new(_fbb); @@ -302,13 +302,13 @@ impl<'a> Default for NullArgs { } } -pub struct NullBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct NullBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NullBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> NullBuilder<'a, 'b> { #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> NullBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> NullBuilder<'a, 'b> { let start = _fbb.start_table(); NullBuilder { fbb_: _fbb, @@ -351,8 +351,8 @@ impl<'a> Bool<'a> { Bool { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args BoolArgs ) -> flatbuffers::WIPOffset> { let mut builder = BoolBuilder::new(_fbb); @@ -394,17 +394,17 @@ impl<'a> Default for BoolArgs { } } -pub struct BoolBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct BoolBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BoolBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> BoolBuilder<'a, 'b> { #[inline] pub fn add_nullable(&mut self, nullable: bool) { self.fbb_.push_slot::(Bool::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BoolBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BoolBuilder<'a, 'b> { let start = _fbb.start_table(); BoolBuilder { fbb_: _fbb, @@ -449,8 +449,8 @@ impl<'a> Primitive<'a> { Primitive { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args PrimitiveArgs ) -> flatbuffers::WIPOffset> { let mut builder = PrimitiveBuilder::new(_fbb); @@ -503,11 +503,11 @@ impl<'a> Default for PrimitiveArgs { } } -pub struct PrimitiveBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct PrimitiveBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PrimitiveBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> PrimitiveBuilder<'a, 'b> { #[inline] pub fn add_ptype(&mut self, ptype: PType) { self.fbb_.push_slot::(Primitive::VT_PTYPE, ptype, PType::U8); @@ -517,7 +517,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PrimitiveBuilder<'a, 'b, A> { self.fbb_.push_slot::(Primitive::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> PrimitiveBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> PrimitiveBuilder<'a, 'b> { let start = _fbb.start_table(); PrimitiveBuilder { fbb_: _fbb, @@ -564,8 +564,8 @@ impl<'a> Decimal<'a> { Decimal { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args DecimalArgs ) -> flatbuffers::WIPOffset> { let mut builder = DecimalBuilder::new(_fbb); @@ -631,11 +631,11 @@ impl<'a> Default for DecimalArgs { } } -pub struct DecimalBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct DecimalBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DecimalBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> { #[inline] pub fn add_precision(&mut self, precision: u8) { self.fbb_.push_slot::(Decimal::VT_PRECISION, precision, 0); @@ -649,7 +649,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DecimalBuilder<'a, 'b, A> { self.fbb_.push_slot::(Decimal::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DecimalBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DecimalBuilder<'a, 'b> { let start = _fbb.start_table(); DecimalBuilder { fbb_: _fbb, @@ -695,8 +695,8 @@ impl<'a> Utf8<'a> { Utf8 { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args Utf8Args ) -> flatbuffers::WIPOffset> { let mut builder = Utf8Builder::new(_fbb); @@ -738,17 +738,17 @@ impl<'a> Default for Utf8Args { } } -pub struct Utf8Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct Utf8Builder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Utf8Builder<'a, 'b, A> { +impl<'a: 'b, 'b> Utf8Builder<'a, 'b> { #[inline] pub fn add_nullable(&mut self, nullable: bool) { self.fbb_.push_slot::(Utf8::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Utf8Builder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8Builder<'a, 'b> { let start = _fbb.start_table(); Utf8Builder { fbb_: _fbb, @@ -792,8 +792,8 @@ impl<'a> Binary<'a> { Binary { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args BinaryArgs ) -> flatbuffers::WIPOffset> { let mut builder = BinaryBuilder::new(_fbb); @@ -835,17 +835,17 @@ impl<'a> Default for BinaryArgs { } } -pub struct BinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct BinaryBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BinaryBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> BinaryBuilder<'a, 'b> { #[inline] pub fn add_nullable(&mut self, nullable: bool) { self.fbb_.push_slot::(Binary::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BinaryBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryBuilder<'a, 'b> { let start = _fbb.start_table(); BinaryBuilder { fbb_: _fbb, @@ -891,8 +891,8 @@ impl<'a> Struct_<'a> { Struct_ { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args Struct_Args<'args> ) -> flatbuffers::WIPOffset> { let mut builder = Struct_Builder::new(_fbb); @@ -956,11 +956,11 @@ impl<'a> Default for Struct_Args<'a> { } } -pub struct Struct_Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct Struct_Builder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Struct_Builder<'a, 'b, A> { +impl<'a: 'b, 'b> Struct_Builder<'a, 'b> { #[inline] pub fn add_names(&mut self, names: flatbuffers::WIPOffset>>) { self.fbb_.push_slot_always::>(Struct_::VT_NAMES, names); @@ -974,7 +974,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Struct_Builder<'a, 'b, A> { self.fbb_.push_slot::(Struct_::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Struct_Builder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Struct_Builder<'a, 'b> { let start = _fbb.start_table(); Struct_Builder { fbb_: _fbb, @@ -1021,8 +1021,8 @@ impl<'a> List<'a> { List { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args ListArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ListBuilder::new(_fbb); @@ -1075,11 +1075,11 @@ impl<'a> Default for ListArgs<'a> { } } -pub struct ListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct ListBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ListBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> ListBuilder<'a, 'b> { #[inline] pub fn add_element_type(&mut self, element_type: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(List::VT_ELEMENT_TYPE, element_type); @@ -1089,7 +1089,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ListBuilder<'a, 'b, A> { self.fbb_.push_slot::(List::VT_NULLABLE, nullable, false); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ListBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListBuilder<'a, 'b> { let start = _fbb.start_table(); ListBuilder { fbb_: _fbb, @@ -1136,8 +1136,8 @@ impl<'a> Extension<'a> { Extension { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args ExtensionArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ExtensionBuilder::new(_fbb); @@ -1201,11 +1201,11 @@ impl<'a> Default for ExtensionArgs<'a> { } } -pub struct ExtensionBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct ExtensionBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ExtensionBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> ExtensionBuilder<'a, 'b> { #[inline] pub fn add_id(&mut self, id: flatbuffers::WIPOffset<&'b str>) { self.fbb_.push_slot_always::>(Extension::VT_ID, id); @@ -1219,7 +1219,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ExtensionBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(Extension::VT_METADATA, metadata); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ExtensionBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ExtensionBuilder<'a, 'b> { let start = _fbb.start_table(); ExtensionBuilder { fbb_: _fbb, @@ -1266,8 +1266,8 @@ impl<'a> DType<'a> { DType { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args DTypeArgs ) -> flatbuffers::WIPOffset> { let mut builder = DTypeBuilder::new(_fbb); @@ -1467,11 +1467,11 @@ impl<'a> Default for DTypeArgs { } } -pub struct DTypeBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct DTypeBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DTypeBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> DTypeBuilder<'a, 'b> { #[inline] pub fn add_type_type(&mut self, type_type: Type) { self.fbb_.push_slot::(DType::VT_TYPE_TYPE, type_type, Type::NONE); @@ -1481,7 +1481,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DTypeBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(DType::VT_TYPE_, type_); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DTypeBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DTypeBuilder<'a, 'b> { let start = _fbb.start_table(); DTypeBuilder { fbb_: _fbb, @@ -1632,13 +1632,13 @@ pub unsafe fn size_prefixed_root_as_dtype_unchecked(buf: &[u8]) -> DType { flatbuffers::size_prefixed_root_unchecked::(buf) } #[inline] -pub fn finish_dtype_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( - fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub fn finish_dtype_buffer<'a, 'b>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish(root, None); } #[inline] -pub fn finish_size_prefixed_dtype_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, root: flatbuffers::WIPOffset>) { +pub fn finish_size_prefixed_dtype_buffer<'a, 'b>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish_size_prefixed(root, None); } diff --git a/vortex-flatbuffers/src/generated/footer.rs b/vortex-flatbuffers/src/generated/footer.rs index 33551bacbc..d242f5bc69 100644 --- a/vortex-flatbuffers/src/generated/footer.rs +++ b/vortex-flatbuffers/src/generated/footer.rs @@ -176,8 +176,8 @@ impl<'a> Layout<'a> { Layout { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args LayoutArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = LayoutBuilder::new(_fbb); @@ -263,11 +263,11 @@ impl<'a> Default for LayoutArgs<'a> { } } -pub struct LayoutBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct LayoutBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LayoutBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> LayoutBuilder<'a, 'b> { #[inline] pub fn add_encoding(&mut self, encoding: u16) { self.fbb_.push_slot::(Layout::VT_ENCODING, encoding, 0); @@ -289,7 +289,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LayoutBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(Layout::VT_METADATA, metadata); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> LayoutBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LayoutBuilder<'a, 'b> { let start = _fbb.start_table(); LayoutBuilder { fbb_: _fbb, @@ -350,8 +350,8 @@ impl<'a> Postscript<'a> { Postscript { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args PostscriptArgs ) -> flatbuffers::WIPOffset> { let mut builder = PostscriptBuilder::new(_fbb); @@ -404,11 +404,11 @@ impl<'a> Default for PostscriptArgs { } } -pub struct PostscriptBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct PostscriptBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PostscriptBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> PostscriptBuilder<'a, 'b> { #[inline] pub fn add_schema_offset(&mut self, schema_offset: u64) { self.fbb_.push_slot::(Postscript::VT_SCHEMA_OFFSET, schema_offset, 0); @@ -418,7 +418,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PostscriptBuilder<'a, 'b, A> { self.fbb_.push_slot::(Postscript::VT_LAYOUT_OFFSET, layout_offset, 0); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> PostscriptBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> PostscriptBuilder<'a, 'b> { let start = _fbb.start_table(); PostscriptBuilder { fbb_: _fbb, @@ -501,13 +501,13 @@ pub unsafe fn size_prefixed_root_as_postscript_unchecked(buf: &[u8]) -> Postscri flatbuffers::size_prefixed_root_unchecked::(buf) } #[inline] -pub fn finish_postscript_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( - fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub fn finish_postscript_buffer<'a, 'b>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish(root, None); } #[inline] -pub fn finish_size_prefixed_postscript_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, root: flatbuffers::WIPOffset>) { +pub fn finish_size_prefixed_postscript_buffer<'a, 'b>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish_size_prefixed(root, None); } diff --git a/vortex-flatbuffers/src/generated/message.rs b/vortex-flatbuffers/src/generated/message.rs index 6aaaf09199..b5ec42ef34 100644 --- a/vortex-flatbuffers/src/generated/message.rs +++ b/vortex-flatbuffers/src/generated/message.rs @@ -4,8 +4,8 @@ // @generated use crate::dtype::*; -use crate::array::*; use crate::scalar::*; +use crate::array::*; use core::mem; use core::cmp::Ordering; @@ -448,8 +448,8 @@ impl<'a> Schema<'a> { Schema { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args SchemaArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = SchemaBuilder::new(_fbb); @@ -491,17 +491,17 @@ impl<'a> Default for SchemaArgs<'a> { } } -pub struct SchemaBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct SchemaBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SchemaBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> { #[inline] pub fn add_dtype(&mut self, dtype: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(Schema::VT_DTYPE, dtype); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> SchemaBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SchemaBuilder<'a, 'b> { let start = _fbb.start_table(); SchemaBuilder { fbb_: _fbb, @@ -548,8 +548,8 @@ impl<'a> Batch<'a> { Batch { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args BatchArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = BatchBuilder::new(_fbb); @@ -624,11 +624,11 @@ impl<'a> Default for BatchArgs<'a> { } } -pub struct BatchBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct BatchBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BatchBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> BatchBuilder<'a, 'b> { #[inline] pub fn add_array(&mut self, array: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(Batch::VT_ARRAY, array); @@ -646,7 +646,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BatchBuilder<'a, 'b, A> { self.fbb_.push_slot::(Batch::VT_BUFFER_SIZE, buffer_size, 0); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BatchBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BatchBuilder<'a, 'b> { let start = _fbb.start_table(); BatchBuilder { fbb_: _fbb, @@ -694,8 +694,8 @@ impl<'a> Page<'a> { Page { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args PageArgs ) -> flatbuffers::WIPOffset> { let mut builder = PageBuilder::new(_fbb); @@ -748,11 +748,11 @@ impl<'a> Default for PageArgs { } } -pub struct PageBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct PageBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PageBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> PageBuilder<'a, 'b> { #[inline] pub fn add_buffer_size(&mut self, buffer_size: u32) { self.fbb_.push_slot::(Page::VT_BUFFER_SIZE, buffer_size, 0); @@ -762,7 +762,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> PageBuilder<'a, 'b, A> { self.fbb_.push_slot::(Page::VT_PADDING, padding, 0); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> PageBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> PageBuilder<'a, 'b> { let start = _fbb.start_table(); PageBuilder { fbb_: _fbb, @@ -809,8 +809,8 @@ impl<'a> Message<'a> { Message { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args MessageArgs ) -> flatbuffers::WIPOffset> { let mut builder = MessageBuilder::new(_fbb); @@ -925,11 +925,11 @@ impl<'a> Default for MessageArgs { } } -pub struct MessageBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct MessageBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MessageBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> MessageBuilder<'a, 'b> { #[inline] pub fn add_version(&mut self, version: MessageVersion) { self.fbb_.push_slot::(Message::VT_VERSION, version, MessageVersion::V0); @@ -943,7 +943,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MessageBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(Message::VT_HEADER, header); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> MessageBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MessageBuilder<'a, 'b> { let start = _fbb.start_table(); MessageBuilder { fbb_: _fbb, @@ -1053,13 +1053,13 @@ pub unsafe fn size_prefixed_root_as_message_unchecked(buf: &[u8]) -> Message { flatbuffers::size_prefixed_root_unchecked::(buf) } #[inline] -pub fn finish_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( - fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub fn finish_message_buffer<'a, 'b>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish(root, None); } #[inline] -pub fn finish_size_prefixed_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, root: flatbuffers::WIPOffset>) { +pub fn finish_size_prefixed_message_buffer<'a, 'b>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish_size_prefixed(root, None); } diff --git a/vortex-flatbuffers/src/generated/scalar.rs b/vortex-flatbuffers/src/generated/scalar.rs index 5864130daf..300c171199 100644 --- a/vortex-flatbuffers/src/generated/scalar.rs +++ b/vortex-flatbuffers/src/generated/scalar.rs @@ -34,8 +34,8 @@ impl<'a> Scalar<'a> { Scalar { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args ScalarArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ScalarBuilder::new(_fbb); @@ -88,11 +88,11 @@ impl<'a> Default for ScalarArgs<'a> { } } -pub struct ScalarBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct ScalarBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ScalarBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> ScalarBuilder<'a, 'b> { #[inline] pub fn add_dtype(&mut self, dtype: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(Scalar::VT_DTYPE, dtype); @@ -102,7 +102,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ScalarBuilder<'a, 'b, A> { self.fbb_.push_slot_always::>(Scalar::VT_VALUE, value); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ScalarBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ScalarBuilder<'a, 'b> { let start = _fbb.start_table(); ScalarBuilder { fbb_: _fbb, @@ -149,8 +149,8 @@ impl<'a> ScalarValue<'a> { ScalarValue { _tab: table } } #[allow(unused_mut)] - pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>( - _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>, + pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>( + _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>, args: &'args ScalarValueArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ScalarValueBuilder::new(_fbb); @@ -192,17 +192,17 @@ impl<'a> Default for ScalarValueArgs<'a> { } } -pub struct ScalarValueBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { - fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub struct ScalarValueBuilder<'a: 'b, 'b> { + fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>, start_: flatbuffers::WIPOffset, } -impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ScalarValueBuilder<'a, 'b, A> { +impl<'a: 'b, 'b> ScalarValueBuilder<'a, 'b> { #[inline] pub fn add_flex(&mut self, flex: flatbuffers::WIPOffset>) { self.fbb_.push_slot_always::>(ScalarValue::VT_FLEX, flex); } #[inline] - pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ScalarValueBuilder<'a, 'b, A> { + pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ScalarValueBuilder<'a, 'b> { let start = _fbb.start_table(); ScalarValueBuilder { fbb_: _fbb, @@ -285,13 +285,13 @@ pub unsafe fn size_prefixed_root_as_scalar_unchecked(buf: &[u8]) -> Scalar { flatbuffers::size_prefixed_root_unchecked::(buf) } #[inline] -pub fn finish_scalar_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>( - fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, +pub fn finish_scalar_buffer<'a, 'b>( + fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish(root, None); } #[inline] -pub fn finish_size_prefixed_scalar_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, root: flatbuffers::WIPOffset>) { +pub fn finish_size_prefixed_scalar_buffer<'a, 'b>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>, root: flatbuffers::WIPOffset>) { fbb.finish_size_prefixed(root, None); } diff --git a/vortex-scalar/src/pvalue.rs b/vortex-scalar/src/pvalue.rs index 88d2eb97b8..ad9dc7062a 100644 --- a/vortex-scalar/src/pvalue.rs +++ b/vortex-scalar/src/pvalue.rs @@ -1,11 +1,13 @@ use core::fmt::Display; use std::cmp::Ordering; use std::mem; +use std::ops::Add; -use num_traits::NumCast; +use arrow_array::ArrowNativeTypeOp; +use num_traits::{CheckedAdd, NumCast}; use paste::paste; use vortex_dtype::half::f16; -use vortex_dtype::{NativePType, PType}; +use vortex_dtype::{match_each_native_ptype, NativePType, PType}; use vortex_error::{vortex_err, VortexError, VortexExpect}; #[derive(Debug, Clone, Copy)] @@ -34,9 +36,9 @@ impl PartialEq for PValue { (Self::I16(s), o) => o.as_i64().vortex_expect("upcast") == *s as i64, (Self::I32(s), o) => o.as_i64().vortex_expect("upcast") == *s as i64, (Self::I64(s), o) => o.as_i64().vortex_expect("upcast") == *s, - (Self::F16(s), Self::F16(o)) => s.is_eq(*o), - (Self::F32(s), Self::F32(o)) => s.is_eq(*o), - (Self::F64(s), Self::F64(o)) => s.is_eq(*o), + (Self::F16(s), Self::F16(o)) => NativePType::is_eq(*s, *o), + (Self::F32(s), Self::F32(o)) => NativePType::is_eq(*s, *o), + (Self::F64(s), Self::F64(o)) => NativePType::is_eq(*s, *o), (..) => false, } } @@ -61,6 +63,35 @@ impl PartialOrd for PValue { } } +impl Add for PValue { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + self.checked_add(&rhs).expect("Addition overflow") + } +} + +impl CheckedAdd for PValue { + fn checked_add(&self, v: &Self) -> Option { + match self.ptype().to_widest() { + PType::I64 => self + .cast_as::()? + .checked_add(v.cast_as::()?) + .map(Self::from), + PType::U64 => self + .cast_as::()? + .checked_add(v.cast_as::()?) + .map(Self::from), + PType::F64 => self + .cast_as::()? + .add_checked(v.cast_as::()?) + .ok() + .map(Self::from), + _ => unreachable!(), + } + } +} + macro_rules! as_primitive { ($T:ty, $PT:tt) => { paste! { @@ -80,6 +111,7 @@ macro_rules! as_primitive { PValue::F64(v) => <$T as NumCast>::from(v), } } + } }; } @@ -112,6 +144,29 @@ impl PValue { T::try_from(*self) } + /// Convert the PValue to the specified type, returning `None` if conversion is unsuccessful + pub fn cast_as(self) -> Option { + match self { + PValue::U8(v) => ::from(v), + PValue::U16(v) => ::from(v), + PValue::U32(v) => ::from(v), + PValue::U64(v) => ::from(v), + PValue::I8(v) => ::from(v), + PValue::I16(v) => ::from(v), + PValue::I32(v) => ::from(v), + PValue::I64(v) => ::from(v), + PValue::F16(v) => ::from(v), + PValue::F32(v) => ::from(v), + PValue::F64(v) => ::from(v), + } + } + + pub fn cast(&self, ptype: PType) -> Option { + match_each_native_ptype!(ptype, |$T| { + self.cast_as::<$T>().map(Self::from) + }) + } + #[allow(clippy::transmute_int_to_float, clippy::transmute_float_to_int)] pub fn reinterpret_cast(&self, ptype: PType) -> Self { if ptype == self.ptype() { From 2bd7aa1f3bdb8ef6033124d6902e4a3397c3389e Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 12 Dec 2024 13:57:05 +0000 Subject: [PATCH 2/3] DataFusion PR --- Cargo.lock | 21 +++++++++++++++++++++ Cargo.toml | 12 ++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f879171f8..d4c0690be0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1191,6 +1191,7 @@ dependencies = [ [[package]] name = "datafusion" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "arrow-array", @@ -1234,6 +1235,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow-schema", "async-trait", @@ -1247,6 +1249,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1270,6 +1273,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "log", "tokio", @@ -1278,10 +1282,12 @@ dependencies = [ [[package]] name = "datafusion-doc" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" [[package]] name = "datafusion-execution" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "chrono", @@ -1300,6 +1306,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1324,6 +1331,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "datafusion-common", @@ -1334,6 +1342,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "arrow-buffer", @@ -1357,6 +1366,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1377,6 +1387,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1389,6 +1400,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1412,6 +1424,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1425,6 +1438,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -1433,6 +1447,7 @@ dependencies = [ [[package]] name = "datafusion-macros" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "proc-macro2", "quote", @@ -1442,6 +1457,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "async-trait", @@ -1460,6 +1476,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1483,6 +1500,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1495,6 +1513,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "datafusion-common", @@ -1510,6 +1529,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "ahash", "arrow", @@ -1542,6 +1562,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "43.0.0" +source = "git+https://github.com/apache/datafusion.git?rev=refs/pull/13736/head#05f480fbce0307fed0a22c489d76c5ccce4aab91" dependencies = [ "arrow", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index 3cbef801d1..11d814de8a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,12 +67,12 @@ clap = "4.5.13" compio = "0.13" criterion = { version = "0.5.1", features = ["html_reports"] } croaring = "2.1.0" -datafusion = { version = "43.0.0", default-features = false, path = "../datafusion/datafusion/core" } -datafusion-common = { version = "43.0.0", path = "../datafusion/datafusion/common" } -datafusion-execution = { version = "43.0.0", path = "../datafusion/datafusion/execution" } -datafusion-expr = { version = "43.0.0", path = "../datafusion/datafusion/expr" } -datafusion-physical-expr = { version = "43.0.0", path = "../datafusion/datafusion/physical-expr" } -datafusion-physical-plan = { version = "43.0.0", path = "../datafusion/datafusion/physical-plan" } +datafusion = { version = "43.0.0", default-features = false, git = "https://github.com/apache/datafusion.git", rev = "refs/pull/13736/head" } +datafusion-common = { version = "43.0.0", git = "https://github.com/apache/datafusion.git", rev = "refs/pull/13736/head" } +datafusion-execution = { version = "43.0.0", git = "https://github.com/apache/datafusion.git", rev = "refs/pull/13736/head" } +datafusion-expr = { version = "43.0.0", git = "https://github.com/apache/datafusion.git", rev = "refs/pull/13736/head" } +datafusion-physical-expr = { version = "43.0.0", git = "https://github.com/apache/datafusion.git", rev = "refs/pull/13736/head" } +datafusion-physical-plan = { version = "43.0.0", git = "https://github.com/apache/datafusion.git", rev = "refs/pull/13736/head" } divan = "0.1.14" enum-iterator = "2.0.0" enum-map = "2.7.3" From cedfd73200f42756e1788280fc0cbcb54be21b0b Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Thu, 12 Dec 2024 14:02:48 +0000 Subject: [PATCH 3/3] DataFusion PR --- vortex-scalar/src/pvalue.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-scalar/src/pvalue.rs b/vortex-scalar/src/pvalue.rs index ad9dc7062a..a4a522a964 100644 --- a/vortex-scalar/src/pvalue.rs +++ b/vortex-scalar/src/pvalue.rs @@ -67,7 +67,7 @@ impl Add for PValue { type Output = Self; fn add(self, rhs: Self) -> Self::Output { - self.checked_add(&rhs).expect("Addition overflow") + self.checked_add(&rhs).vortex_expect("Addition overflow") } }