Skip to content

Commit

Permalink
feat: add Table and table_utility.rs
Browse files Browse the repository at this point in the history
  • Loading branch information
iajoiner committed Nov 11, 2024
1 parent fdb4c9e commit afa6d90
Show file tree
Hide file tree
Showing 4 changed files with 650 additions and 0 deletions.
8 changes: 8 additions & 0 deletions crates/proof-of-sql/src/base/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ pub(crate) use owned_table::OwnedTableError;
mod owned_table_test;
pub mod owned_table_utility;

mod table;
pub use table::Table;
#[cfg(test)]
pub(crate) use table::TableError;
#[cfg(test)]
mod table_test;
pub mod table_utility;

/// TODO: add docs
pub(crate) mod expression_evaluation;
mod expression_evaluation_error;
Expand Down
97 changes: 97 additions & 0 deletions crates/proof-of-sql/src/base/database/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use super::Column;
use crate::base::{map::IndexMap, scalar::Scalar};
use proof_of_sql_parser::Identifier;
use snafu::Snafu;

/// An error that occurs when working with tables.
#[derive(Snafu, Debug, PartialEq, Eq)]
pub enum TableError {
/// The columns have different lengths.
#[snafu(display("Columns have different lengths"))]
ColumnLengthMismatch,
}
/// A table of data, with schema included. This is simply a map from `Identifier` to `Column`,
/// where columns order matters.
/// This is primarily used as an internal result that is used before
/// converting to the final result in either Arrow format or JSON.
/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch).
#[derive(Debug, Clone, Eq)]
pub struct Table<'a, S: Scalar> {
table: IndexMap<Identifier, Column<'a, S>>,
}
impl<'a, S: Scalar> Table<'a, S> {
/// Creates a new [`Table`].
pub fn try_new(table: IndexMap<Identifier, Column<'a, S>>) -> Result<Self, TableError> {
if table.is_empty() {
return Ok(Self { table });
}
let num_rows = table[0].len();
if table.values().any(|column| column.len() != num_rows) {
Err(TableError::ColumnLengthMismatch)
} else {
Ok(Self { table })
}
}
/// Creates a new [`Table`].
pub fn try_from_iter<T: IntoIterator<Item = (Identifier, Column<'a, S>)>>(
iter: T,
) -> Result<Self, TableError> {
Self::try_new(IndexMap::from_iter(iter))
}
/// Number of columns in the table.
#[must_use]
pub fn num_columns(&self) -> usize {
self.table.len()
}
/// Number of rows in the table.
#[must_use]
pub fn num_rows(&self) -> usize {
if self.table.is_empty() {
0
} else {
self.table[0].len()
}
}
/// Whether the table has no columns.
#[must_use]
pub fn is_empty(&self) -> bool {
self.table.is_empty()
}
/// Returns the columns of this table as an `IndexMap`
#[must_use]
pub fn into_inner(self) -> IndexMap<Identifier, Column<'a, S>> {
self.table
}
/// Returns the columns of this table as an `IndexMap`
#[must_use]
pub fn inner_table(&self) -> &IndexMap<Identifier, Column<'a, S>> {
&self.table
}
/// Returns the columns of this table as an Iterator
pub fn column_names(&self) -> impl Iterator<Item = &Identifier> {
self.table.keys()
}
}

// Note: we modify the default PartialEq for IndexMap to also check for column ordering.
// This is to align with the behaviour of a `RecordBatch`.
impl<S: Scalar> PartialEq for Table<'_, S> {
fn eq(&self, other: &Self) -> bool {
self.table == other.table
&& self
.table
.keys()
.zip(other.table.keys())
.all(|(a, b)| a == b)
}
}

#[cfg(test)]
impl<'a, S: Scalar> core::ops::Index<&str> for Table<'a, S> {
type Output = Column<'a, S>;
fn index(&self, index: &str) -> &Self::Output {
self.table
.get(&index.parse::<Identifier>().unwrap())
.unwrap()
}
}
208 changes: 208 additions & 0 deletions crates/proof-of-sql/src/base/database/table_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
use crate::{
base::{
database::{table_utility::*, Column, Table, TableError},
map::IndexMap,
scalar::test_scalar::TestScalar,
},
proof_primitive::dory::DoryScalar,
};
use bumpalo::Bump;
use proof_of_sql_parser::{
posql_time::{PoSQLTimeUnit, PoSQLTimeZone},
Identifier,
};

#[test]
fn we_can_create_a_table_with_no_columns() {
let table = Table::<TestScalar>::try_new(IndexMap::default()).unwrap();
assert_eq!(table.num_columns(), 0);
}
#[test]
fn we_can_create_an_empty_table() {
let alloc = Bump::new();
let borrowed_table = table::<DoryScalar>([
bigint("bigint", [0; 0], &alloc),
int128("decimal", [0; 0], &alloc),
varchar("varchar", ["0"; 0], &alloc),
scalar("scalar", [0; 0], &alloc),
boolean("boolean", [true; 0], &alloc),
]);
let mut table = IndexMap::default();
table.insert(Identifier::try_new("bigint").unwrap(), Column::BigInt(&[]));
table.insert(Identifier::try_new("decimal").unwrap(), Column::Int128(&[]));
table.insert(
Identifier::try_new("varchar").unwrap(),
Column::VarChar((&[], &[])),
);
table.insert(Identifier::try_new("scalar").unwrap(), Column::Scalar(&[]));
table.insert(
Identifier::try_new("boolean").unwrap(),
Column::Boolean(&[]),
);
assert_eq!(borrowed_table.into_inner(), table);
}

#[test]
fn we_can_create_a_table_with_data() {
let alloc = Bump::new();

let borrowed_table = table::<DoryScalar>([
bigint(
"bigint",
[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX],
&alloc,
),
int128(
"decimal",
[0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX],
&alloc,
),
varchar(
"varchar",
["0", "1", "2", "3", "4", "5", "6", "7", "8"],
&alloc,
),
scalar("scalar", [0, 1, 2, 3, 4, 5, 6, 7, 8], &alloc),
boolean(
"boolean",
[true, false, true, false, true, false, true, false, true],
&alloc,
),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX],
&alloc,
),
]);

let mut expected_table = IndexMap::default();

let time_stamp_data = alloc.alloc_slice_copy(&[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]);
expected_table.insert(
Identifier::try_new("time_stamp").unwrap(),
Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, time_stamp_data),
);

let bigint_data = alloc.alloc_slice_copy(&[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]);
expected_table.insert(
Identifier::try_new("bigint").unwrap(),
Column::BigInt(bigint_data),
);

let decimal_data = alloc.alloc_slice_copy(&[0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]);
expected_table.insert(
Identifier::try_new("decimal").unwrap(),
Column::Int128(decimal_data),
);

let varchar_data: Vec<&str> = ["0", "1", "2", "3", "4", "5", "6", "7", "8"]
.iter()
.map(|&s| alloc.alloc_str(s) as &str)
.collect();
let varchar_str_slice = alloc.alloc_slice_clone(&varchar_data);
let varchar_scalars: Vec<DoryScalar> = varchar_data.iter().map(Into::into).collect();
let varchar_scalars_slice = alloc.alloc_slice_clone(&varchar_scalars);
expected_table.insert(
Identifier::try_new("varchar").unwrap(),
Column::VarChar((varchar_str_slice, varchar_scalars_slice)),
);

let scalar_data: Vec<DoryScalar> = (0..=8).map(DoryScalar::from).collect();
let scalar_slice = alloc.alloc_slice_copy(&scalar_data);
expected_table.insert(
Identifier::try_new("scalar").unwrap(),
Column::Scalar(scalar_slice),
);

let boolean_data =
alloc.alloc_slice_copy(&[true, false, true, false, true, false, true, false, true]);
expected_table.insert(
Identifier::try_new("boolean").unwrap(),
Column::Boolean(boolean_data),
);

assert_eq!(borrowed_table.into_inner(), expected_table);
}

#[test]
fn we_get_inequality_between_tables_with_differing_column_order() {
let alloc = Bump::new();

let table_a: Table<'_, TestScalar> = table([
bigint("a", [0; 0], &alloc),
int128("b", [0; 0], &alloc),
varchar("c", ["0"; 0], &alloc),
boolean("d", [false; 0], &alloc),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0_i64; 0],
&alloc,
),
]);

let table_b: Table<'_, TestScalar> = table([
boolean("d", [false; 0], &alloc),
int128("b", [0; 0], &alloc),
bigint("a", [0; 0], &alloc),
varchar("c", ["0"; 0], &alloc),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0_i64; 0],
&alloc,
),
]);

assert_ne!(table_a, table_b);
}

#[test]
fn we_get_inequality_between_tables_with_differing_data() {
let alloc = Bump::new();

let table_a: Table<'_, DoryScalar> = table([
bigint("a", [0], &alloc),
int128("b", [0], &alloc),
varchar("c", ["0"], &alloc),
boolean("d", [true], &alloc),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[1_625_072_400],
&alloc,
),
]);

let table_b: Table<'_, DoryScalar> = table([
bigint("a", [1], &alloc),
int128("b", [0], &alloc),
varchar("c", ["0"], &alloc),
boolean("d", [true], &alloc),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[1_625_076_000],
&alloc,
),
]);

assert_ne!(table_a, table_b);
}

#[test]
fn we_cannot_create_a_table_with_differing_column_lengths() {
assert!(matches!(
Table::<TestScalar>::try_from_iter([
("a".parse().unwrap(), Column::BigInt(&[0])),
("b".parse().unwrap(), Column::BigInt(&[])),
]),
Err(TableError::ColumnLengthMismatch)
));
}
Loading

0 comments on commit afa6d90

Please sign in to comment.