-
Notifications
You must be signed in to change notification settings - Fork 126
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: implement
postprocessing::OrderByExpr
(#46)
# Rationale for this change We need to have a native Rust implementation of postprocessing for `ORDER BY` so that we can remove polars & the `transformation` module. <!-- Why are you proposing this change? If this is already explained clearly in the linked Jira ticket then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> # What changes are included in this PR? - add `math::permutation::Permutation` - add `OwnedColumn::try_permute` - add `sql::postprocessing::OrderByExpr` <!-- There is no need to duplicate the description in the ticket here but it is sometimes worth providing a summary of the individual changes in this PR. --> # Are these changes tested? Yes <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? -->
- Loading branch information
Showing
10 changed files
with
514 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,8 @@ | ||
//! Handles parsing between decimal tokens received from the lexer into native `Decimal75` Proof of SQL type. | ||
//! This module defines math utilities used in Proof of SQL. | ||
/// Handles parsing between decimal tokens received from the lexer into native `Decimal75` Proof of SQL type. | ||
pub mod decimal; | ||
#[cfg(test)] | ||
mod decimal_tests; | ||
mod log; | ||
pub(crate) use log::log2_up; | ||
pub(crate) mod permutation; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
use thiserror::Error; | ||
|
||
/// An error that occurs when working with permutations | ||
#[derive(Error, Debug, PartialEq, Eq)] | ||
pub enum PermutationError { | ||
/// The permutation is invalid | ||
#[error("Permutation is invalid {0}")] | ||
InvalidPermutation(String), | ||
/// Application of a permutation to a slice with an incorrect length | ||
#[error("Application of a permutation to a slice with a different length {permutation_size} != {slice_length}")] | ||
PermutationSizeMismatch { | ||
permutation_size: usize, | ||
slice_length: usize, | ||
}, | ||
} | ||
|
||
/// Permutation of [0, 1, 2, ..., n-1] | ||
#[derive(Debug, Clone, PartialEq, Eq)] | ||
pub struct Permutation { | ||
/// The permutation | ||
permutation: Vec<usize>, | ||
} | ||
|
||
impl Permutation { | ||
/// Create a new permutation without checks | ||
/// | ||
/// Warning: This function does not check if the permutation is valid. | ||
/// Only use this function if you are sure that the permutation is valid. | ||
pub(crate) fn unchecked_new(permutation: Vec<usize>) -> Self { | ||
Self { permutation } | ||
} | ||
|
||
/// Create a new permutation. If the permutation is invalid, return an error. | ||
pub fn try_new(permutation: Vec<usize>) -> Result<Self, PermutationError> { | ||
let length = permutation.len(); | ||
// Check for uniqueness | ||
let mut elements = permutation.clone(); | ||
elements.sort_unstable(); | ||
elements.dedup(); | ||
if elements.len() < length { | ||
Err(PermutationError::InvalidPermutation(format!( | ||
"Permutation can not have duplicate elements: {:?}", | ||
permutation | ||
))) | ||
} | ||
// Check that no element is out of bounds | ||
else if permutation.iter().any(|&i| i >= length) { | ||
Err(PermutationError::InvalidPermutation(format!( | ||
"Permutation can not have elements out of bounds: {:?}", | ||
permutation | ||
))) | ||
} else { | ||
Ok(Self { permutation }) | ||
} | ||
} | ||
|
||
/// Get the size of the permutation | ||
pub fn size(&self) -> usize { | ||
self.permutation.len() | ||
} | ||
|
||
/// Apply the permutation to the given slice | ||
pub fn try_apply<T>(&self, slice: &[T]) -> Result<Vec<T>, PermutationError> | ||
where | ||
T: Clone, | ||
{ | ||
if slice.len() != self.size() { | ||
Err(PermutationError::PermutationSizeMismatch { | ||
permutation_size: self.size(), | ||
slice_length: slice.len(), | ||
}) | ||
} else { | ||
Ok(self.permutation.iter().map(|&i| slice[i].clone()).collect()) | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use super::*; | ||
|
||
#[test] | ||
fn test_apply_permutation() { | ||
let permutation = Permutation::try_new(vec![1, 0, 2]).unwrap(); | ||
assert_eq!(permutation.size(), 3); | ||
assert_eq!( | ||
permutation.try_apply(&["and", "Space", "Time"]).unwrap(), | ||
vec!["Space", "and", "Time"] | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_invalid_permutation() { | ||
assert!(matches!( | ||
Permutation::try_new(vec![1, 0, 0]), | ||
Err(PermutationError::InvalidPermutation(_)) | ||
)); | ||
assert!(matches!( | ||
Permutation::try_new(vec![1, 0, 3]), | ||
Err(PermutationError::InvalidPermutation(_)) | ||
)); | ||
} | ||
|
||
#[test] | ||
fn test_permutation_size_mismatch() { | ||
let permutation = Permutation::try_new(vec![1, 0, 2]).unwrap(); | ||
assert_eq!( | ||
permutation.try_apply(&["Space", "Time"]), | ||
Err(PermutationError::PermutationSizeMismatch { | ||
permutation_size: 3, | ||
slice_length: 2 | ||
}) | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
72 changes: 72 additions & 0 deletions
72
crates/proof-of-sql/src/sql/postprocessing/order_by_expr.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
use super::{PostprocessingError, PostprocessingResult, PostprocessingStep}; | ||
use crate::base::{ | ||
database::{compare_indexes_by_owned_columns_with_direction, OwnedColumn, OwnedTable}, | ||
math::permutation::Permutation, | ||
scalar::Scalar, | ||
}; | ||
use proof_of_sql_parser::intermediate_ast::{OrderBy, OrderByDirection}; | ||
use rayon::prelude::ParallelSliceMut; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
/// A node representing a list of `OrderBy` expressions. | ||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] | ||
pub struct OrderByExpr<S: Scalar> { | ||
by_exprs: Vec<OrderBy>, | ||
_phantom: core::marker::PhantomData<S>, | ||
} | ||
|
||
impl<S: Scalar> OrderByExpr<S> { | ||
/// Create a new `OrderByExpr` node. | ||
pub fn new(by_exprs: Vec<OrderBy>) -> Self { | ||
Self { | ||
by_exprs, | ||
_phantom: core::marker::PhantomData, | ||
} | ||
} | ||
} | ||
|
||
impl<S: Scalar> PostprocessingStep<S> for OrderByExpr<S> { | ||
/// Apply the slice transformation to the given `OwnedTable`. | ||
fn apply(&self, owned_table: OwnedTable<S>) -> PostprocessingResult<OwnedTable<S>> { | ||
let mut indexes = (0..owned_table.num_rows()).collect::<Vec<_>>(); | ||
// Evaluate the columns by which we order | ||
// Once we allow OrderBy for general aggregation-free expressions here we will need to call eval() | ||
let order_by_pairs: Vec<(OwnedColumn<S>, OrderByDirection)> = self | ||
.by_exprs | ||
.iter() | ||
.map( | ||
|order_by| -> PostprocessingResult<(OwnedColumn<S>, OrderByDirection)> { | ||
Ok(( | ||
owned_table | ||
.inner_table() | ||
.get(&order_by.expr) | ||
.ok_or(PostprocessingError::ColumnNotFound( | ||
order_by.expr.to_string(), | ||
))? | ||
.clone(), | ||
order_by.direction, | ||
)) | ||
}, | ||
) | ||
.collect::<PostprocessingResult<Vec<(OwnedColumn<S>, OrderByDirection)>>>()?; | ||
// Define the ordering | ||
indexes.par_sort_unstable_by(|&a, &b| { | ||
compare_indexes_by_owned_columns_with_direction(&order_by_pairs, a, b) | ||
}); | ||
let permutation = Permutation::unchecked_new(indexes); | ||
// Apply the ordering | ||
Ok( | ||
OwnedTable::<S>::try_from_iter(owned_table.into_inner().into_iter().map( | ||
|(identifier, column)| { | ||
( | ||
identifier, | ||
column | ||
.try_permute(&permutation) | ||
.expect("There should be no column length mismatch here"), | ||
) | ||
}, | ||
)) | ||
.expect("There should be no column length mismatch here"), | ||
) | ||
} | ||
} |
Oops, something went wrong.