Skip to content

Commit

Permalink
refactor!: move ordering-related code to order_by_util.rs && fix an…
Browse files Browse the repository at this point in the history
… issue with decimals (#388)
  • Loading branch information
iajoiner authored Nov 24, 2024
2 parents 0cac403 + 3c96dd1 commit c0a2f98
Show file tree
Hide file tree
Showing 8 changed files with 331 additions and 319 deletions.
56 changes: 4 additions & 52 deletions crates/proof-of-sql/src/base/database/group_by_util.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
//! Contains the utility functions for the `GroupByExec` node.
use crate::base::{
database::{filter_util::filter_column_by_index, Column, OwnedColumn},
database::{
filter_util::filter_column_by_index, order_by_util::compare_indexes_by_columns, Column,
},
if_rayon,
scalar::{Scalar, ScalarExt},
scalar::Scalar,
};
use alloc::vec::Vec;
use bumpalo::Bump;
Expand Down Expand Up @@ -355,53 +357,3 @@ where
.min_by(super::super::scalar::ScalarExt::signed_cmp)
}))
}

/// Compares the tuples `(group_by[0][i], group_by[1][i], ...)` and
/// `(group_by[0][j], group_by[1][j], ...)` in lexicographic order.
pub(crate) fn compare_indexes_by_columns<S: Scalar>(
group_by: &[Column<S>],
i: usize,
j: usize,
) -> Ordering {
group_by
.iter()
.map(|col| match col {
Column::Boolean(col) => col[i].cmp(&col[j]),
Column::TinyInt(col) => col[i].cmp(&col[j]),
Column::SmallInt(col) => col[i].cmp(&col[j]),
Column::Int(col) => col[i].cmp(&col[j]),
Column::BigInt(col) | Column::TimestampTZ(_, _, col) => col[i].cmp(&col[j]),
Column::Int128(col) => col[i].cmp(&col[j]),
Column::Decimal75(_, _, col) => col[i].signed_cmp(&col[j]),
Column::Scalar(col) => col[i].cmp(&col[j]),
Column::VarChar((col, _)) => col[i].cmp(col[j]),
})
.find(|&ord| ord != Ordering::Equal)
.unwrap_or(Ordering::Equal)
}

/// Compares the tuples `(group_by[0][i], group_by[1][i], ...)` and
/// `(group_by[0][j], group_by[1][j], ...)` in lexicographic order.
///
/// Identical in functionality to [`compare_indexes_by_columns`]
pub(crate) fn compare_indexes_by_owned_columns<S: Scalar>(
group_by: &[&OwnedColumn<S>],
i: usize,
j: usize,
) -> Ordering {
group_by
.iter()
.map(|col| match col {
OwnedColumn::Boolean(col) => col[i].cmp(&col[j]),
OwnedColumn::TinyInt(col) => col[i].cmp(&col[j]),
OwnedColumn::SmallInt(col) => col[i].cmp(&col[j]),
OwnedColumn::Int(col) => col[i].cmp(&col[j]),
OwnedColumn::BigInt(col) | OwnedColumn::TimestampTZ(_, _, col) => col[i].cmp(&col[j]),
OwnedColumn::Int128(col) => col[i].cmp(&col[j]),
OwnedColumn::Decimal75(_, _, col) => col[i].signed_cmp(&col[j]),
OwnedColumn::Scalar(col) => col[i].cmp(&col[j]),
OwnedColumn::VarChar(col) => col[i].cmp(&col[j]),
})
.find(|&ord| ord != Ordering::Equal)
.unwrap_or(Ordering::Equal)
}
181 changes: 1 addition & 180 deletions crates/proof-of-sql/src/base/database/group_by_util_test.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use crate::{
base::{
database::{group_by_util::*, Column, OwnedColumn},
database::{group_by_util::*, Column},
scalar::test_scalar::TestScalar,
},
proof_primitive::dory::DoryScalar,
};
use bumpalo::Bump;
use core::cmp::Ordering;

#[test]
fn we_can_aggregate_empty_columns() {
Expand Down Expand Up @@ -217,184 +216,6 @@ fn we_can_aggregate_columns() {
assert_eq!(aggregate_result.min_columns, expected_min_result);
}

#[test]
fn we_can_compare_indexes_by_columns_with_no_columns() {
let columns: &[Column<TestScalar>; 0] = &[];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Equal);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Equal);
assert_eq!(compare_indexes_by_columns(columns, 3, 2), Ordering::Equal);
}

#[test]
fn we_can_compare_indexes_by_columns_for_bigint_columns() {
let slice_a = &[55, 44, 66, 66, 66, 77, 66, 66, 66, 66];
let slice_b = &[22, 44, 11, 44, 33, 22, 22, 11, 22, 22];
let slice_c = &[11, 55, 11, 44, 77, 11, 22, 55, 11, 22];
let column_a = Column::BigInt::<DoryScalar>(slice_a);
let column_b = Column::BigInt::<DoryScalar>(slice_b);
let column_c = Column::BigInt::<DoryScalar>(slice_c);

let columns = &[column_a];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Equal);
assert_eq!(compare_indexes_by_columns(columns, 2, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 0), Ordering::Less);
let columns = &[column_a, column_b];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 3, 4), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 2, 7), Ordering::Equal);
let columns = &[column_a, column_b, column_c];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 3, 4), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 2, 7), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 6, 9), Ordering::Equal);
}
#[test]
fn we_can_compare_indexes_by_columns_for_mixed_columns() {
let slice_a = &["55", "44", "66", "66", "66", "77", "66", "66", "66", "66"];
let slice_b = &[22, 44, 11, 44, 33, 22, 22, 11, 22, 22];
let slice_c = &[11, 55, 11, 44, 77, 11, 22, 55, 11, 22];
let scals_a: Vec<TestScalar> = slice_a.iter().map(core::convert::Into::into).collect();
let column_a = Column::VarChar((slice_a, &scals_a));
let column_b = Column::Int128(slice_b);
let column_c = Column::BigInt(slice_c);

let columns = &[column_a];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Equal);
assert_eq!(compare_indexes_by_columns(columns, 2, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 0), Ordering::Less);
let columns = &[column_a, column_b];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 3, 4), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 2, 7), Ordering::Equal);
let columns = &[column_a, column_b, column_c];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 3, 4), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 2, 7), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 6, 9), Ordering::Equal);
}
#[test]
fn we_can_compare_indexes_by_owned_columns_for_mixed_columns() {
let slice_a = ["55", "44", "66", "66", "66", "77", "66", "66", "66", "66"]
.into_iter()
.map(Into::into)
.collect();
let slice_b = vec![22, 44, 11, 44, 33, 22, 22, 11, 22, 22];
let slice_c = vec![11, 55, 11, 44, 77, 11, 22, 55, 11, 22];
let column_a = OwnedColumn::<DoryScalar>::VarChar(slice_a);
let column_b = OwnedColumn::Int128(slice_b);
let column_c = OwnedColumn::BigInt(slice_c);

let columns = &[&column_a];
assert_eq!(
compare_indexes_by_owned_columns(columns, 0, 1),
Ordering::Greater
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 1, 2),
Ordering::Less
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 2, 3),
Ordering::Equal
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 2, 1),
Ordering::Greater
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 1, 0),
Ordering::Less
);
let columns = &[&column_a, &column_b];
assert_eq!(
compare_indexes_by_owned_columns(columns, 0, 1),
Ordering::Greater
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 1, 2),
Ordering::Less
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 2, 3),
Ordering::Less
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 3, 4),
Ordering::Greater
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 2, 7),
Ordering::Equal
);
let columns = &[&column_a, &column_b, &column_c];
assert_eq!(
compare_indexes_by_owned_columns(columns, 0, 1),
Ordering::Greater
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 1, 2),
Ordering::Less
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 2, 3),
Ordering::Less
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 3, 4),
Ordering::Greater
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 2, 7),
Ordering::Less
);
assert_eq!(
compare_indexes_by_owned_columns(columns, 6, 9),
Ordering::Equal
);
}
#[test]
fn we_can_compare_indexes_by_columns_for_scalar_columns() {
let slice_a = &[55, 44, 66, 66, 66, 77, 66, 66, 66, 66];
let slice_b = &[22, 44, 11, 44, 33, 22, 22, 11, 22, 22];
let slice_c = &[11, 55, 11, 44, 77, 11, 22, 55, 11, 22];
let scals_a: Vec<TestScalar> = slice_a.iter().map(core::convert::Into::into).collect();
let column_a = Column::Scalar(&scals_a);
let column_b = Column::Int128(slice_b);
let column_c = Column::BigInt(slice_c);

let columns = &[column_a];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Equal);
assert_eq!(compare_indexes_by_columns(columns, 2, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 0), Ordering::Less);
let columns = &[column_a, column_b];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 3, 4), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 2, 7), Ordering::Equal);
let columns = &[column_a, column_b, column_c];
assert_eq!(compare_indexes_by_columns(columns, 0, 1), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 1, 2), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 2, 3), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 3, 4), Ordering::Greater);
assert_eq!(compare_indexes_by_columns(columns, 2, 7), Ordering::Less);
assert_eq!(compare_indexes_by_columns(columns, 6, 9), Ordering::Equal);
}

// SUM slices
#[test]
fn we_can_sum_aggregate_slice_by_counts_for_empty_slice() {
Expand Down
5 changes: 4 additions & 1 deletion crates/proof-of-sql/src/base/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ pub use table_ref::TableRef;
pub mod arrow_schema_utility;

mod owned_column;
pub(crate) use owned_column::compare_indexes_by_owned_columns_with_direction;
pub use owned_column::OwnedColumn;

mod owned_column_error;
Expand Down Expand Up @@ -115,3 +114,7 @@ mod group_by_util_test;

#[allow(dead_code)]
pub(crate) mod union_util;

pub(crate) mod order_by_util;
#[cfg(test)]
mod order_by_util_test;
81 changes: 81 additions & 0 deletions crates/proof-of-sql/src/base/database/order_by_util.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
//! Contains the utility functions for ordering.
use crate::base::{
database::{Column, OwnedColumn},
scalar::{Scalar, ScalarExt},
};
use alloc::vec::Vec;
use core::cmp::Ordering;
use proof_of_sql_parser::intermediate_ast::OrderByDirection;

/// Compares the tuples `(order_by[0][i], order_by[1][i], ...)` and
/// `(order_by[0][j], order_by[1][j], ...)` in lexicographic order.
pub(crate) fn compare_indexes_by_columns<S: Scalar>(
order_by: &[Column<S>],
i: usize,
j: usize,
) -> Ordering {
order_by
.iter()
.map(|col| match col {
Column::Boolean(col) => col[i].cmp(&col[j]),
Column::TinyInt(col) => col[i].cmp(&col[j]),
Column::SmallInt(col) => col[i].cmp(&col[j]),
Column::Int(col) => col[i].cmp(&col[j]),
Column::BigInt(col) | Column::TimestampTZ(_, _, col) => col[i].cmp(&col[j]),
Column::Int128(col) => col[i].cmp(&col[j]),
Column::Decimal75(_, _, col) => col[i].signed_cmp(&col[j]),
Column::Scalar(col) => col[i].cmp(&col[j]),
Column::VarChar((col, _)) => col[i].cmp(col[j]),
})
.find(|&ord| ord != Ordering::Equal)
.unwrap_or(Ordering::Equal)
}

/// Compares the tuples `(order_by[0][i], order_by[1][i], ...)` and
/// `(order_by[0][j], order_by[1][j], ...)` in lexicographic order.
///
/// Identical in functionality to [`compare_indexes_by_columns`]
pub(crate) fn compare_indexes_by_owned_columns<S: Scalar>(
order_by: &[&OwnedColumn<S>],
i: usize,
j: usize,
) -> Ordering {
let order_by_pairs = order_by
.iter()
.map(|&col| (col.clone(), OrderByDirection::Asc))
.collect::<Vec<_>>();
compare_indexes_by_owned_columns_with_direction(&order_by_pairs, i, j)
}

/// Compares the tuples `(left[0][i], left[1][i], ...)` and
/// `(right[0][j], right[1][j], ...)` in lexicographic order.
/// Note that direction flips the ordering.
pub(crate) fn compare_indexes_by_owned_columns_with_direction<S: Scalar>(
order_by_pairs: &[(OwnedColumn<S>, OrderByDirection)],
i: usize,
j: usize,
) -> Ordering {
order_by_pairs
.iter()
.map(|(col, direction)| {
let ordering = match col {
OwnedColumn::Boolean(col) => col[i].cmp(&col[j]),
OwnedColumn::TinyInt(col) => col[i].cmp(&col[j]),
OwnedColumn::SmallInt(col) => col[i].cmp(&col[j]),
OwnedColumn::Int(col) => col[i].cmp(&col[j]),
OwnedColumn::BigInt(col) | OwnedColumn::TimestampTZ(_, _, col) => {
col[i].cmp(&col[j])
}
OwnedColumn::Int128(col) => col[i].cmp(&col[j]),
OwnedColumn::Decimal75(_, _, col) => col[i].signed_cmp(&col[j]),
OwnedColumn::Scalar(col) => col[i].cmp(&col[j]),
OwnedColumn::VarChar(col) => col[i].cmp(&col[j]),
};
match direction {
OrderByDirection::Asc => ordering,
OrderByDirection::Desc => ordering.reverse(),
}
})
.find(|&ord| ord != Ordering::Equal)
.unwrap_or(Ordering::Equal)
}
Loading

0 comments on commit c0a2f98

Please sign in to comment.