From 05223630aaede8e4d5d7af81423074b63798c8c2 Mon Sep 17 00:00:00 2001 From: Ian Joiner <14581281+iajoiner@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:27:04 -0500 Subject: [PATCH] feat: add `cross_join` --- .../src/base/database/join_util.rs | 243 ++++++++++++++++++ crates/proof-of-sql/src/base/database/mod.rs | 4 +- 2 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 crates/proof-of-sql/src/base/database/join_util.rs diff --git a/crates/proof-of-sql/src/base/database/join_util.rs b/crates/proof-of-sql/src/base/database/join_util.rs new file mode 100644 index 000000000..7cb614a9e --- /dev/null +++ b/crates/proof-of-sql/src/base/database/join_util.rs @@ -0,0 +1,243 @@ +use super::{ColumnRepeatOp, ElementwiseRepeatOp, RepetitionOp, Table, TableOptions}; +use crate::base::scalar::Scalar; +use bumpalo::Bump; + +/// Compute the CROSS JOIN / cartesian product of two tables. +/// +/// # Panics +/// The function if written correctly can not actually panic. +pub fn cross_join<'a, S: Scalar>( + left: &Table<'a, S>, + right: &Table<'a, S>, + alloc: &'a Bump, +) -> Table<'a, S> { + let left_num_rows = left.num_rows(); + let right_num_rows = right.num_rows(); + let product_num_rows = left_num_rows * right_num_rows; + Table::<'a, S>::try_from_iter_with_options( + left.inner_table() + .iter() + .map(|(&ident, column)| { + ( + ident, + ColumnRepeatOp::column_op(column, alloc, right_num_rows), + ) + }) + .chain(right.inner_table().iter().map(|(&ident, column)| { + ( + ident, + ElementwiseRepeatOp::column_op(column, alloc, left_num_rows), + ) + })), + TableOptions::new(Some(product_num_rows)), + ) + .expect("Table creation should not fail") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::base::{database::Column, scalar::test_scalar::TestScalar}; + + #[test] + fn we_can_do_cross_joins() { + let bump = Bump::new(); + let a = "a".parse().unwrap(); + let b = "b".parse().unwrap(); + let c = "c".parse().unwrap(); + let d = "d".parse().unwrap(); + let left = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (a, Column::SmallInt(&[1_i16, 2, 3])), + (b, Column::Int(&[4_i32, 5, 6])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let right = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (c, Column::BigInt(&[7_i64, 8, 9])), + (d, Column::Int128(&[10_i128, 11, 12])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 9); + assert_eq!(result.num_columns(), 4); + assert_eq!( + result.inner_table()[&a].as_smallint().unwrap(), + &[1_i16, 2, 3, 1, 2, 3, 1, 2, 3] + ); + assert_eq!( + result.inner_table()[&b].as_int().unwrap(), + &[4_i32, 5, 6, 4, 5, 6, 4, 5, 6] + ); + assert_eq!( + result.inner_table()[&c].as_bigint().unwrap(), + &[7_i64, 7, 7, 8, 8, 8, 9, 9, 9] + ); + assert_eq!( + result.inner_table()[&d].as_int128().unwrap(), + &[10_i128, 10, 10, 11, 11, 11, 12, 12, 12] + ); + } + + #[test] + fn we_can_do_cross_joins_if_one_table_has_no_rows() { + let bump = Bump::new(); + let a = "a".parse().unwrap(); + let b = "b".parse().unwrap(); + let c = "c".parse().unwrap(); + let d = "d".parse().unwrap(); + + // Right table has no rows + let left = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (a, Column::SmallInt(&[1_i16, 2, 3])), + (b, Column::Int(&[4_i32, 5, 6])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let right = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (c, Column::BigInt(&[0_i64; 0])), + (d, Column::Int128(&[0_i128; 0])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 0); + assert_eq!(result.num_columns(), 4); + assert_eq!(result.inner_table()[&a].as_smallint().unwrap(), &[0_i16; 0]); + assert_eq!(result.inner_table()[&b].as_int().unwrap(), &[0_i32; 0]); + assert_eq!(result.inner_table()[&c].as_bigint().unwrap(), &[0_i64; 0]); + assert_eq!(result.inner_table()[&d].as_int128().unwrap(), &[0_i128; 0]); + + // Left table has no rows + let left = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (a, Column::SmallInt(&[0_i16; 0])), + (b, Column::Int(&[0_i32; 0])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let right = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (c, Column::BigInt(&[7_i64, 8, 9])), + (d, Column::Int128(&[10_i128, 11, 12])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 0); + assert_eq!(result.num_columns(), 4); + assert_eq!(result.inner_table()[&a].as_smallint().unwrap(), &[0_i16; 0]); + assert_eq!(result.inner_table()[&b].as_int().unwrap(), &[0_i32; 0]); + assert_eq!(result.inner_table()[&c].as_bigint().unwrap(), &[0_i64; 0]); + assert_eq!(result.inner_table()[&d].as_int128().unwrap(), &[0_i128; 0]); + + // Both tables have no rows + let left = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (a, Column::SmallInt(&[0_i16; 0])), + (b, Column::Int(&[0_i32; 0])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let right = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (c, Column::BigInt(&[0_i64; 0])), + (d, Column::Int128(&[0_i128; 0])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 0); + assert_eq!(result.num_columns(), 4); + assert_eq!(result.inner_table()[&a].as_smallint().unwrap(), &[0_i16; 0]); + assert_eq!(result.inner_table()[&b].as_int().unwrap(), &[0_i32; 0]); + assert_eq!(result.inner_table()[&c].as_bigint().unwrap(), &[0_i64; 0]); + assert_eq!(result.inner_table()[&d].as_int128().unwrap(), &[0_i128; 0]); + } + + #[test] + fn we_can_do_cross_joins_if_one_table_has_no_columns() { + // Left table has no columns + let bump = Bump::new(); + let a = "a".parse().unwrap(); + let b = "b".parse().unwrap(); + let c = "c".parse().unwrap(); + let d = "d".parse().unwrap(); + let left = + Table::<'_, TestScalar>::try_from_iter_with_options(vec![], TableOptions::new(Some(2))) + .expect("Table creation should not fail"); + + let right = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (c, Column::BigInt(&[7_i64, 8])), + (d, Column::Int128(&[10_i128, 11])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 4); + assert_eq!(result.num_columns(), 2); + assert_eq!( + result.inner_table()[&c].as_bigint().unwrap(), + &[7_i64, 7, 8, 8] + ); + assert_eq!( + result.inner_table()[&d].as_int128().unwrap(), + &[10_i128, 10, 11, 11] + ); + + // Right table has no columns + let left = Table::<'_, TestScalar>::try_from_iter_with_options( + vec![ + (a, Column::SmallInt(&[1_i16, 2])), + (b, Column::Int(&[4_i32, 5])), + ], + TableOptions::default(), + ) + .expect("Table creation should not fail"); + let right = + Table::<'_, TestScalar>::try_from_iter_with_options(vec![], TableOptions::new(Some(0))) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 0); + assert_eq!(result.num_columns(), 2); + assert_eq!(result.inner_table()[&a].as_smallint().unwrap(), &[0_i16; 0]); + assert_eq!(result.inner_table()[&b].as_int().unwrap(), &[0_i32; 0]); + + // Both tables have no columns + let left = + Table::<'_, TestScalar>::try_from_iter_with_options(vec![], TableOptions::new(Some(2))) + .expect("Table creation should not fail"); + let right = + Table::<'_, TestScalar>::try_from_iter_with_options(vec![], TableOptions::new(Some(7))) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 14); + assert_eq!(result.num_columns(), 0); + + // Both tables have no columns and no rows + let left = + Table::<'_, TestScalar>::try_from_iter_with_options(vec![], TableOptions::new(Some(0))) + .expect("Table creation should not fail"); + let right = + Table::<'_, TestScalar>::try_from_iter_with_options(vec![], TableOptions::new(Some(0))) + .expect("Table creation should not fail"); + let result = cross_join(&left, &right, &bump); + assert_eq!(result.num_rows(), 0); + assert_eq!(result.num_columns(), 0); + } +} diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index dd3730255..b7447947c 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -26,7 +26,6 @@ pub(super) use column_comparison_operation::{ }; mod column_repetition_operation; -#[allow(unused_imports)] pub(super) use column_repetition_operation::{ColumnRepeatOp, ElementwiseRepeatOp, RepetitionOp}; mod column_operation_error; @@ -118,3 +117,6 @@ pub(crate) mod union_util; pub(crate) mod order_by_util; #[cfg(test)] mod order_by_util_test; + +#[allow(dead_code)] +pub(crate) mod join_util;