Skip to content

Commit

Permalink
feat: add compare_single_row_of_tables (#391)
Browse files Browse the repository at this point in the history
Please be sure to look over the pull request guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md#submit-pr.

# Please go through the following checklist
- [x] The PR title and commit messages adhere to guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md.
In particular `!` is used if and only if at least one breaking change
has been introduced.
- [x] I have run the ci check script with `source
scripts/run_ci_checks.sh`.

# Rationale for this change
We need to add this function in order to perform sort-merge joins.
<!--
Why are you proposing this change? If this is already explained clearly
in the linked issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.

 Example:
 Add `NestedLoopJoinExec`.
 Closes #345.

Since we added `HashJoinExec` in #323 it has been possible to do
provable inner joins. However performance is not satisfactory in some
cases. Hence we need to fix the problem by implement
`NestedLoopJoinExec` and speed up the code
 for `HashJoinExec`.
-->

# What changes are included in this PR?
See title.
<!--
There is no need to duplicate the description in the ticket here but it
is sometimes worth providing a summary of the individual changes in this
PR.

Example:
- Add `NestedLoopJoinExec`.
- Speed up `HashJoinExec`.
- Route joins to `NestedLoopJoinExec` if the outer input is sufficiently
small.
-->

# Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?

Example:
Yes.
-->
Yes.
  • Loading branch information
iajoiner authored Dec 3, 2024
2 parents ba1ebbd + 1efe43a commit ec101bf
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 3 deletions.
70 changes: 69 additions & 1 deletion crates/proof-of-sql/src/base/database/order_by_util.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Contains the utility functions for ordering.
use crate::base::{
database::{Column, OwnedColumn},
database::{Column, OwnedColumn, TableOperationError, TableOperationResult},
scalar::{Scalar, ScalarExt},
};
use alloc::vec::Vec;
Expand Down Expand Up @@ -31,6 +31,74 @@ pub(crate) fn compare_indexes_by_columns<S: Scalar>(
.unwrap_or(Ordering::Equal)
}

/// Compares the tuples `(left[0][i], left[1][i], ...)` and
/// `(right[0][j], right[1][j], ...)` in lexicographic order.
///
/// Requires that columns in `left` and `right` have the same column types for now
///
/// # Panics
/// Panics if `left` and `right` have different number of columns
/// which should never happen since this function should only be called
/// for joins.
#[allow(dead_code)]
pub(crate) fn compare_single_row_of_tables<S: Scalar>(
left: &[Column<S>],
right: &[Column<S>],
left_row_index: usize,
right_row_index: usize,
) -> TableOperationResult<Ordering> {
// Should never happen
assert_eq!(left.len(), right.len());
left.iter()
.zip(right.iter())
.try_for_each(|(left_col, right_col)| {
if left_col.column_type() != right_col.column_type() {
return Err(TableOperationError::JoinIncompatibleTypes {
left_type: left_col.column_type(),
right_type: right_col.column_type(),
});
}
Ok(())
})?;
Ok(left
.iter()
.zip(right.iter())
.map(|(left_col, right_col)| match (left_col, right_col) {
(Column::Boolean(left_col), Column::Boolean(right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::TinyInt(left_col), Column::TinyInt(right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::SmallInt(left_col), Column::SmallInt(right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::Int(left_col), Column::Int(right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::BigInt(left_col), Column::BigInt(right_col))
| (Column::TimestampTZ(_, _, left_col), Column::TimestampTZ(_, _, right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::Int128(left_col), Column::Int128(right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::Decimal75(_, _, left_col), Column::Decimal75(_, _, right_col)) => {
left_col[left_row_index].signed_cmp(&right_col[right_row_index])
}
(Column::Scalar(left_col), Column::Scalar(right_col)) => {
left_col[left_row_index].cmp(&right_col[right_row_index])
}
(Column::VarChar((left_col, _)), Column::VarChar((right_col, _))) => {
left_col[left_row_index].cmp(right_col[right_row_index])
}
// Should never happen since we checked the column types
_ => unreachable!(),
})
.find(|&ord| ord != Ordering::Equal)
.unwrap_or(Ordering::Equal))
}

/// Compares the tuples `(order_by[0][i], order_by[1][i], ...)` and
/// `(order_by[0][j], order_by[1][j], ...)` in lexicographic order.
///
Expand Down
61 changes: 60 additions & 1 deletion crates/proof-of-sql/src/base/database/order_by_util_test.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{
base::{
database::{order_by_util::*, Column, OwnedColumn},
database::{order_by_util::*, Column, ColumnType, OwnedColumn, TableOperationError},
math::decimal::Precision,
scalar::test_scalar::TestScalar,
},
Expand Down Expand Up @@ -78,6 +78,65 @@ fn we_can_compare_indexes_by_columns_for_mixed_columns() {
assert_eq!(compare_indexes_by_columns(columns, 6, 9), Ordering::Equal);
}

#[test]
fn we_can_compare_single_row_of_tables() {
let left_slice_a = &[55, 44, 44, 66, 66, 77, 66, 66, 66, 66];
let left_slice_b = &[22, 44, 55, 44, 33, 22, 22, 11, 22, 22];
let left_slice_c = &[11, 55, 11, 44, 77, 11, 22, 55, 11, 22];
let left_column_a = Column::BigInt::<TestScalar>(left_slice_a);
let left_column_b = Column::BigInt::<TestScalar>(left_slice_b);
let left_column_c = Column::BigInt::<TestScalar>(left_slice_c);
let left = &[left_column_a, left_column_b, left_column_c];

let right_slice_a = &[77, 44, 66, 44, 77, 77, 66, 66, 55, 66];
let right_slice_b = &[22, 55, 11, 77, 33, 33, 22, 22, 22, 11];
let right_slice_c = &[11, 55, 22, 0, 77, 11, 33, 55, 11, 22];
let right_column_a = Column::BigInt::<TestScalar>(right_slice_a);
let right_column_b = Column::BigInt::<TestScalar>(right_slice_b);
let right_column_c = Column::BigInt::<TestScalar>(right_slice_c);
let right = &[right_column_a, right_column_b, right_column_c];

assert_eq!(
compare_single_row_of_tables(left, right, 0, 1).unwrap(),
Ordering::Greater
);
assert_eq!(
compare_single_row_of_tables(left, right, 1, 2).unwrap(),
Ordering::Less
);
assert_eq!(
compare_single_row_of_tables(left, right, 2, 3).unwrap(),
Ordering::Less
);
assert_eq!(
compare_single_row_of_tables(left, right, 2, 1).unwrap(),
Ordering::Less
);
assert_eq!(
compare_single_row_of_tables(left, right, 5, 0).unwrap(),
Ordering::Equal
);
}

#[test]
fn we_cannot_compare_single_row_of_tables_if_type_mismatch() {
let left_slice = &[55, 44, 66, 66, 66, 77, 66, 66, 66, 66];
let right_slice = &[
true, false, true, true, false, true, false, true, false, true,
];
let left_column = Column::BigInt::<TestScalar>(left_slice);
let right_column = Column::Boolean::<TestScalar>(right_slice);
let left = &[left_column];
let right = &[right_column];
assert_eq!(
compare_single_row_of_tables(left, right, 0, 1),
Err(TableOperationError::JoinIncompatibleTypes {
left_type: ColumnType::BigInt,
right_type: ColumnType::Boolean
})
);
}

#[test]
fn we_can_compare_indexes_by_owned_columns_for_mixed_columns() {
let slice_a = ["55", "44", "66", "66", "66", "77", "66", "66", "66", "66"]
Expand Down
12 changes: 11 additions & 1 deletion crates/proof-of-sql/src/base/database/table_operation_error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::base::database::ColumnField;
use crate::base::database::{ColumnField, ColumnType};
use alloc::vec::Vec;
use core::result::Result;
use snafu::Snafu;
Expand All @@ -16,6 +16,16 @@ pub enum TableOperationError {
/// The schema of the table that caused the error
actual_schema: Vec<ColumnField>,
},
/// Errors related to joining tables on columns with incompatible types.
#[snafu(display(
"Cannot join tables on columns with incompatible types: {left_type:?} and {right_type:?}"
))]
JoinIncompatibleTypes {
/// The left-hand side data type
left_type: ColumnType,
/// The right-hand side data type
right_type: ColumnType,
},
}

/// Result type for table operations
Expand Down

0 comments on commit ec101bf

Please sign in to comment.