Skip to content

Commit

Permalink
feat: add UnionExec (#406)
Browse files Browse the repository at this point in the history
Please be sure to look over the pull request guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md#submit-pr.

# Please go through the following checklist
- [x] The PR title and commit messages adhere to guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md.
In particular `!` is used if and only if at least one breaking change
has been introduced.
- [x] I have run the ci check script with `source
scripts/run_ci_checks.sh`.
- The following upstream PRs have been approved and merged:
  - [x] #405

# Rationale for this change
We need to add support for `UNION ALL`.
<!--
Why are you proposing this change? If this is already explained clearly
in the linked issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.

 Example:
 Add `NestedLoopJoinExec`.
 Closes #345.

Since we added `HashJoinExec` in #323 it has been possible to do
provable inner joins. However performance is not satisfactory in some
cases. Hence we need to fix the problem by implement
`NestedLoopJoinExec` and speed up the code
 for `HashJoinExec`.
-->

# What changes are included in this PR?
- add `UnionExec`
- add some code to simplify debugging of sumchecks

<!--
There is no need to duplicate the description in the ticket here but it
is sometimes worth providing a summary of the individual changes in this
PR.

Example:
- Add `NestedLoopJoinExec`.
- Speed up `HashJoinExec`.
- Route joins to `NestedLoopJoinExec` if the outer input is sufficiently
small.
-->

# Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?

Example:
Yes.
-->
Yes.
  • Loading branch information
iajoiner authored Dec 11, 2024
2 parents cb4a93c + 1919586 commit 5b3c440
Show file tree
Hide file tree
Showing 9 changed files with 641 additions and 12 deletions.
1 change: 0 additions & 1 deletion crates/proof-of-sql/src/base/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ pub(crate) mod group_by_util;
#[cfg(test)]
mod group_by_util_test;

#[allow(dead_code)]
pub(crate) mod union_util;

pub(crate) mod order_by_util;
Expand Down
4 changes: 2 additions & 2 deletions crates/proof-of-sql/src/base/database/table_evaluation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ impl<S: Scalar> TableEvaluation<S> {

/// Returns the evaluation of an all-one column with the same length as the table.
#[must_use]
pub fn one_eval(&self) -> &S {
&self.one_eval
pub fn one_eval(&self) -> S {
self.one_eval
}
}
10 changes: 5 additions & 5 deletions crates/proof-of-sql/src/base/database/union_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ pub fn column_union<'a, S: Scalar>(
/// # Panics
/// This function should never panic as long as it is written correctly
pub fn table_union<'a, S: Scalar>(
tables: &[&Table<'a, S>],
tables: &[Table<'a, S>],
alloc: &'a Bump,
schema: Vec<ColumnField>,
) -> TableOperationResult<Table<'a, S>> {
Expand All @@ -190,7 +190,7 @@ pub fn table_union<'a, S: Scalar>(
}
// Union the columns
// Make sure to consider the case where the tables have no columns
let num_rows = tables.iter().map(|table| table.num_rows()).sum();
let num_rows = tables.iter().map(Table::num_rows).sum();
let result = Table::<'a, S>::try_from_iter_with_options(
schema.iter().enumerate().map(|(i, field)| {
let columns: Vec<_> = tables
Expand Down Expand Up @@ -290,7 +290,7 @@ mod tests {
TableOptions::new(Some(0)),
)
.unwrap();
let result = table_union(&[&table0, &table1, &table2], &alloc, vec![]).unwrap();
let result = table_union(&[table0, table1, table2], &alloc, vec![]).unwrap();
assert_eq!(
result,
Table::<'_, TestScalar>::try_new_with_options(
Expand Down Expand Up @@ -322,7 +322,7 @@ mod tests {
)
.unwrap();
let result = table_union(
&[&table0, &table1],
&[table0, table1],
&alloc,
vec![
ColumnField::new("e".parse().unwrap(), ColumnType::BigInt),
Expand Down Expand Up @@ -365,7 +365,7 @@ mod tests {
)
.unwrap();
let result = table_union(
&[&table0, &table1],
&[table0, table1],
&alloc,
vec![
ColumnField::new("e".parse().unwrap(), ColumnType::BigInt),
Expand Down
12 changes: 11 additions & 1 deletion crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use super::{EmptyExec, FilterExec, GroupByExec, ProjectionExec, SliceExec, TableExec};
use super::{EmptyExec, FilterExec, GroupByExec, ProjectionExec, SliceExec, TableExec, UnionExec};
use crate::{
base::{
database::{ColumnField, ColumnRef, OwnedTable, Table, TableEvaluation, TableRef},
Expand Down Expand Up @@ -48,4 +48,14 @@ pub enum DynProofPlan {
/// <ProofPlan> LIMIT <fetch> [OFFSET <skip>]
/// ```
Slice(SliceExec),
/// `ProofPlan` for queries of the form
/// ```ignore
/// <ProofPlan>
/// UNION ALL
/// <ProofPlan>
/// ...
/// UNION ALL
/// <ProofPlan>
/// ```
Union(UnionExec),
}
6 changes: 5 additions & 1 deletion crates/proof-of-sql/src/sql/proof_plans/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,15 @@ pub(crate) use group_by_exec::GroupByExec;
mod group_by_exec_test;

mod slice_exec;
#[allow(unused_imports)]
pub(crate) use slice_exec::SliceExec;
#[cfg(all(test, feature = "blitzar"))]
mod slice_exec_test;

mod union_exec;
pub(crate) use union_exec::UnionExec;
#[cfg(all(test, feature = "blitzar"))]
mod union_exec_test;

mod dyn_proof_plan;
pub use dyn_proof_plan::DynProofPlan;

Expand Down
2 changes: 1 addition & 1 deletion crates/proof-of-sql/src/sql/proof_plans/slice_exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ where
builder,
alpha,
beta,
*input_table_eval.one_eval(),
input_table_eval.one_eval(),
output_one_eval,
columns_evals,
selection_eval,
Expand Down
11 changes: 10 additions & 1 deletion crates/proof-of-sql/src/sql/proof_plans/test_utility.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
use super::{
DynProofPlan, EmptyExec, FilterExec, GroupByExec, ProjectionExec, SliceExec, TableExec,
UnionExec,
};
use crate::{
base::database::{ColumnField, TableRef},
base::database::{ColumnField, ColumnType, TableRef},
sql::proof_exprs::{AliasedDynProofExpr, ColumnExpr, DynProofExpr, TableExpr},
};

pub fn column_field(name: &str, column_type: ColumnType) -> ColumnField {
ColumnField::new(name.parse().unwrap(), column_type)
}

pub fn empty_exec() -> DynProofPlan {
DynProofPlan::Empty(EmptyExec::new())
}
Expand Down Expand Up @@ -48,3 +53,7 @@ pub fn group_by(
pub fn slice_exec(input: DynProofPlan, skip: usize, fetch: Option<usize>) -> DynProofPlan {
DynProofPlan::Slice(SliceExec::new(Box::new(input), skip, fetch))
}

pub fn union_exec(inputs: Vec<DynProofPlan>, schema: Vec<ColumnField>) -> DynProofPlan {
DynProofPlan::Union(UnionExec::new(inputs, schema))
}
Loading

0 comments on commit 5b3c440

Please sign in to comment.