From 775724a6dade43440a2b3345ea71e2bdb0359897 Mon Sep 17 00:00:00 2001 From: Ian Joiner <14581281+iajoiner@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:32:29 -0500 Subject: [PATCH 1/2] feat: add `compare_indexes_of_tables_by_columns` --- .../src/base/database/order_by_util.rs | 61 ++++++++++++++++++- .../src/base/database/order_by_util_test.rs | 61 ++++++++++++++++++- .../base/database/table_operation_error.rs | 12 +++- 3 files changed, 131 insertions(+), 3 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/order_by_util.rs b/crates/proof-of-sql/src/base/database/order_by_util.rs index e2589d1db..d5c70c5db 100644 --- a/crates/proof-of-sql/src/base/database/order_by_util.rs +++ b/crates/proof-of-sql/src/base/database/order_by_util.rs @@ -1,6 +1,6 @@ //! Contains the utility functions for ordering. use crate::base::{ - database::{Column, OwnedColumn}, + database::{Column, OwnedColumn, TableOperationError, TableOperationResult}, scalar::{Scalar, ScalarExt}, }; use alloc::vec::Vec; @@ -31,6 +31,65 @@ pub(crate) fn compare_indexes_by_columns( .unwrap_or(Ordering::Equal) } +/// Compares the tuples `(left[0][i], left[1][i], ...)` and +/// `(right[0][j], right[1][j], ...)` in lexicographic order. +/// +/// Requires that columns in `left` and `right` have the same column types for now +/// +/// # Panics +/// Panics if `left` and `right` have different number of columns +/// which should never happen since this function should only be called +/// for joins. +#[allow(dead_code)] +pub(crate) fn compare_indexes_of_tables_by_columns( + left: &[Column], + right: &[Column], + i: usize, + j: usize, +) -> TableOperationResult { + // Should never happen + assert_eq!(left.len(), right.len()); + for col in 0..left.len() { + if left[col].column_type() != right[col].column_type() { + return Err(TableOperationError::JoinIncompatibleTypes { + left_type: left[col].column_type(), + right_type: right[col].column_type(), + }); + } + } + Ok(left + .iter() + .zip(right.iter()) + .map(|(left_col, right_col)| match (left_col, right_col) { + (Column::Boolean(left_col), Column::Boolean(right_col)) => { + left_col[i].cmp(&right_col[j]) + } + (Column::TinyInt(left_col), Column::TinyInt(right_col)) => { + left_col[i].cmp(&right_col[j]) + } + (Column::SmallInt(left_col), Column::SmallInt(right_col)) => { + left_col[i].cmp(&right_col[j]) + } + (Column::Int(left_col), Column::Int(right_col)) => left_col[i].cmp(&right_col[j]), + (Column::BigInt(left_col), Column::BigInt(right_col)) + | (Column::TimestampTZ(_, _, left_col), Column::TimestampTZ(_, _, right_col)) => { + left_col[i].cmp(&right_col[j]) + } + (Column::Int128(left_col), Column::Int128(right_col)) => left_col[i].cmp(&right_col[j]), + (Column::Decimal75(_, _, left_col), Column::Decimal75(_, _, right_col)) => { + left_col[i].signed_cmp(&right_col[j]) + } + (Column::Scalar(left_col), Column::Scalar(right_col)) => left_col[i].cmp(&right_col[j]), + (Column::VarChar((left_col, _)), Column::VarChar((right_col, _))) => { + left_col[i].cmp(right_col[j]) + } + // Should never happen since we checked the column types + _ => unreachable!(), + }) + .find(|&ord| ord != Ordering::Equal) + .unwrap_or(Ordering::Equal)) +} + /// Compares the tuples `(order_by[0][i], order_by[1][i], ...)` and /// `(order_by[0][j], order_by[1][j], ...)` in lexicographic order. /// diff --git a/crates/proof-of-sql/src/base/database/order_by_util_test.rs b/crates/proof-of-sql/src/base/database/order_by_util_test.rs index 644706bad..da03d5909 100644 --- a/crates/proof-of-sql/src/base/database/order_by_util_test.rs +++ b/crates/proof-of-sql/src/base/database/order_by_util_test.rs @@ -1,6 +1,6 @@ use crate::{ base::{ - database::{order_by_util::*, Column, OwnedColumn}, + database::{order_by_util::*, Column, ColumnType, OwnedColumn, TableOperationError}, math::decimal::Precision, scalar::test_scalar::TestScalar, }, @@ -78,6 +78,65 @@ fn we_can_compare_indexes_by_columns_for_mixed_columns() { assert_eq!(compare_indexes_by_columns(columns, 6, 9), Ordering::Equal); } +#[test] +fn we_can_compare_indexes_of_tables_by_columns() { + let left_slice_a = &[55, 44, 44, 66, 66, 77, 66, 66, 66, 66]; + let left_slice_b = &[22, 44, 55, 44, 33, 22, 22, 11, 22, 22]; + let left_slice_c = &[11, 55, 11, 44, 77, 11, 22, 55, 11, 22]; + let left_column_a = Column::BigInt::(left_slice_a); + let left_column_b = Column::BigInt::(left_slice_b); + let left_column_c = Column::BigInt::(left_slice_c); + let left = &[left_column_a, left_column_b, left_column_c]; + + let right_slice_a = &[77, 44, 66, 44, 77, 77, 66, 66, 55, 66]; + let right_slice_b = &[22, 55, 11, 77, 33, 33, 22, 22, 22, 11]; + let right_slice_c = &[11, 55, 22, 0, 77, 11, 33, 55, 11, 22]; + let right_column_a = Column::BigInt::(right_slice_a); + let right_column_b = Column::BigInt::(right_slice_b); + let right_column_c = Column::BigInt::(right_slice_c); + let right = &[right_column_a, right_column_b, right_column_c]; + + assert_eq!( + compare_indexes_of_tables_by_columns(left, right, 0, 1).unwrap(), + Ordering::Greater + ); + assert_eq!( + compare_indexes_of_tables_by_columns(left, right, 1, 2).unwrap(), + Ordering::Less + ); + assert_eq!( + compare_indexes_of_tables_by_columns(left, right, 2, 3).unwrap(), + Ordering::Less + ); + assert_eq!( + compare_indexes_of_tables_by_columns(left, right, 2, 1).unwrap(), + Ordering::Less + ); + assert_eq!( + compare_indexes_of_tables_by_columns(left, right, 5, 0).unwrap(), + Ordering::Equal + ); +} + +#[test] +fn we_cannot_compare_indexes_of_tables_by_columns_if_type_mismatch() { + let left_slice = &[55, 44, 66, 66, 66, 77, 66, 66, 66, 66]; + let right_slice = &[ + true, false, true, true, false, true, false, true, false, true, + ]; + let left_column = Column::BigInt::(left_slice); + let right_column = Column::Boolean::(right_slice); + let left = &[left_column]; + let right = &[right_column]; + assert_eq!( + compare_indexes_of_tables_by_columns(left, right, 0, 1), + Err(TableOperationError::JoinIncompatibleTypes { + left_type: ColumnType::BigInt, + right_type: ColumnType::Boolean + }) + ); +} + #[test] fn we_can_compare_indexes_by_owned_columns_for_mixed_columns() { let slice_a = ["55", "44", "66", "66", "66", "77", "66", "66", "66", "66"] diff --git a/crates/proof-of-sql/src/base/database/table_operation_error.rs b/crates/proof-of-sql/src/base/database/table_operation_error.rs index 6f630fe3c..b0ec13d91 100644 --- a/crates/proof-of-sql/src/base/database/table_operation_error.rs +++ b/crates/proof-of-sql/src/base/database/table_operation_error.rs @@ -1,4 +1,4 @@ -use crate::base::database::ColumnField; +use crate::base::database::{ColumnField, ColumnType}; use alloc::vec::Vec; use core::result::Result; use snafu::Snafu; @@ -16,6 +16,16 @@ pub enum TableOperationError { /// The schema of the table that caused the error actual_schema: Vec, }, + /// Errors related to joining tables on columns with incompatible types. + #[snafu(display( + "Cannot join tables on columns with incompatible types: {left_type:?} and {right_type:?}" + ))] + JoinIncompatibleTypes { + /// The left-hand side data type + left_type: ColumnType, + /// The right-hand side data type + right_type: ColumnType, + }, } /// Result type for table operations From 1efe43aa43e7b998b1d8a6164def3c77dd0155ab Mon Sep 17 00:00:00 2001 From: Ian Joiner <14581281+iajoiner@users.noreply.github.com> Date: Mon, 2 Dec 2024 21:35:40 -0500 Subject: [PATCH 2/2] fix: address review comments --- .../src/base/database/order_by_util.rs | 49 +++++++++++-------- .../src/base/database/order_by_util_test.rs | 16 +++--- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/order_by_util.rs b/crates/proof-of-sql/src/base/database/order_by_util.rs index d5c70c5db..325ed49f9 100644 --- a/crates/proof-of-sql/src/base/database/order_by_util.rs +++ b/crates/proof-of-sql/src/base/database/order_by_util.rs @@ -41,47 +41,56 @@ pub(crate) fn compare_indexes_by_columns( /// which should never happen since this function should only be called /// for joins. #[allow(dead_code)] -pub(crate) fn compare_indexes_of_tables_by_columns( +pub(crate) fn compare_single_row_of_tables( left: &[Column], right: &[Column], - i: usize, - j: usize, + left_row_index: usize, + right_row_index: usize, ) -> TableOperationResult { // Should never happen assert_eq!(left.len(), right.len()); - for col in 0..left.len() { - if left[col].column_type() != right[col].column_type() { - return Err(TableOperationError::JoinIncompatibleTypes { - left_type: left[col].column_type(), - right_type: right[col].column_type(), - }); - } - } + left.iter() + .zip(right.iter()) + .try_for_each(|(left_col, right_col)| { + if left_col.column_type() != right_col.column_type() { + return Err(TableOperationError::JoinIncompatibleTypes { + left_type: left_col.column_type(), + right_type: right_col.column_type(), + }); + } + Ok(()) + })?; Ok(left .iter() .zip(right.iter()) .map(|(left_col, right_col)| match (left_col, right_col) { (Column::Boolean(left_col), Column::Boolean(right_col)) => { - left_col[i].cmp(&right_col[j]) + left_col[left_row_index].cmp(&right_col[right_row_index]) } (Column::TinyInt(left_col), Column::TinyInt(right_col)) => { - left_col[i].cmp(&right_col[j]) + left_col[left_row_index].cmp(&right_col[right_row_index]) } (Column::SmallInt(left_col), Column::SmallInt(right_col)) => { - left_col[i].cmp(&right_col[j]) + left_col[left_row_index].cmp(&right_col[right_row_index]) + } + (Column::Int(left_col), Column::Int(right_col)) => { + left_col[left_row_index].cmp(&right_col[right_row_index]) } - (Column::Int(left_col), Column::Int(right_col)) => left_col[i].cmp(&right_col[j]), (Column::BigInt(left_col), Column::BigInt(right_col)) | (Column::TimestampTZ(_, _, left_col), Column::TimestampTZ(_, _, right_col)) => { - left_col[i].cmp(&right_col[j]) + left_col[left_row_index].cmp(&right_col[right_row_index]) + } + (Column::Int128(left_col), Column::Int128(right_col)) => { + left_col[left_row_index].cmp(&right_col[right_row_index]) } - (Column::Int128(left_col), Column::Int128(right_col)) => left_col[i].cmp(&right_col[j]), (Column::Decimal75(_, _, left_col), Column::Decimal75(_, _, right_col)) => { - left_col[i].signed_cmp(&right_col[j]) + left_col[left_row_index].signed_cmp(&right_col[right_row_index]) + } + (Column::Scalar(left_col), Column::Scalar(right_col)) => { + left_col[left_row_index].cmp(&right_col[right_row_index]) } - (Column::Scalar(left_col), Column::Scalar(right_col)) => left_col[i].cmp(&right_col[j]), (Column::VarChar((left_col, _)), Column::VarChar((right_col, _))) => { - left_col[i].cmp(right_col[j]) + left_col[left_row_index].cmp(right_col[right_row_index]) } // Should never happen since we checked the column types _ => unreachable!(), diff --git a/crates/proof-of-sql/src/base/database/order_by_util_test.rs b/crates/proof-of-sql/src/base/database/order_by_util_test.rs index da03d5909..81a02862d 100644 --- a/crates/proof-of-sql/src/base/database/order_by_util_test.rs +++ b/crates/proof-of-sql/src/base/database/order_by_util_test.rs @@ -79,7 +79,7 @@ fn we_can_compare_indexes_by_columns_for_mixed_columns() { } #[test] -fn we_can_compare_indexes_of_tables_by_columns() { +fn we_can_compare_single_row_of_tables() { let left_slice_a = &[55, 44, 44, 66, 66, 77, 66, 66, 66, 66]; let left_slice_b = &[22, 44, 55, 44, 33, 22, 22, 11, 22, 22]; let left_slice_c = &[11, 55, 11, 44, 77, 11, 22, 55, 11, 22]; @@ -97,29 +97,29 @@ fn we_can_compare_indexes_of_tables_by_columns() { let right = &[right_column_a, right_column_b, right_column_c]; assert_eq!( - compare_indexes_of_tables_by_columns(left, right, 0, 1).unwrap(), + compare_single_row_of_tables(left, right, 0, 1).unwrap(), Ordering::Greater ); assert_eq!( - compare_indexes_of_tables_by_columns(left, right, 1, 2).unwrap(), + compare_single_row_of_tables(left, right, 1, 2).unwrap(), Ordering::Less ); assert_eq!( - compare_indexes_of_tables_by_columns(left, right, 2, 3).unwrap(), + compare_single_row_of_tables(left, right, 2, 3).unwrap(), Ordering::Less ); assert_eq!( - compare_indexes_of_tables_by_columns(left, right, 2, 1).unwrap(), + compare_single_row_of_tables(left, right, 2, 1).unwrap(), Ordering::Less ); assert_eq!( - compare_indexes_of_tables_by_columns(left, right, 5, 0).unwrap(), + compare_single_row_of_tables(left, right, 5, 0).unwrap(), Ordering::Equal ); } #[test] -fn we_cannot_compare_indexes_of_tables_by_columns_if_type_mismatch() { +fn we_cannot_compare_single_row_of_tables_if_type_mismatch() { let left_slice = &[55, 44, 66, 66, 66, 77, 66, 66, 66, 66]; let right_slice = &[ true, false, true, true, false, true, false, true, false, true, @@ -129,7 +129,7 @@ fn we_cannot_compare_indexes_of_tables_by_columns_if_type_mismatch() { let left = &[left_column]; let right = &[right_column]; assert_eq!( - compare_indexes_of_tables_by_columns(left, right, 0, 1), + compare_single_row_of_tables(left, right, 0, 1), Err(TableOperationError::JoinIncompatibleTypes { left_type: ColumnType::BigInt, right_type: ColumnType::Boolean