diff --git a/crates/proof-of-sql/src/base/database/table.rs b/crates/proof-of-sql/src/base/database/table.rs new file mode 100644 index 000000000..cc6b2991e --- /dev/null +++ b/crates/proof-of-sql/src/base/database/table.rs @@ -0,0 +1,97 @@ +use super::Column; +use crate::base::{map::IndexMap, scalar::Scalar}; +use proof_of_sql_parser::Identifier; +use snafu::Snafu; + +/// An error that occurs when working with tables. +#[derive(Snafu, Debug, PartialEq, Eq)] +pub enum TableError { + /// The columns have different lengths. + #[snafu(display("Columns have different lengths"))] + ColumnLengthMismatch, +} +/// A table of data, with schema included. This is simply a map from `Identifier` to `Column`, +/// where columns order matters. +/// This is primarily used as an internal result that is used before +/// converting to the final result in either Arrow format or JSON. +/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch). +#[derive(Debug, Clone, Eq)] +pub struct Table<'a, S: Scalar> { + table: IndexMap>, +} +impl Table<'a, S> { + /// Creates a new [`Table`]. + pub fn try_new(table: IndexMap>) -> Result { + if table.is_empty() { + return Ok(Self { table }); + } + let num_rows = table[0].len(); + if table.values().any(|column| column.len() != num_rows) { + Err(TableError::ColumnLengthMismatch) + } else { + Ok(Self { table }) + } + } + /// Creates a new [`Table`]. + pub fn try_from_iter)>>( + iter: T, + ) -> Result { + Self::try_new(IndexMap::from_iter(iter)) + } + /// Number of columns in the table. + #[must_use] + pub fn num_columns(&self) -> usize { + self.table.len() + } + /// Number of rows in the table. + #[must_use] + pub fn num_rows(&self) -> usize { + if self.table.is_empty() { + 0 + } else { + self.table[0].len() + } + } + /// Whether the table has no columns. + #[must_use] + pub fn is_empty(&self) -> bool { + self.table.is_empty() + } + /// Returns the columns of this table as an `IndexMap` + #[must_use] + pub fn into_inner(self) -> IndexMap> { + self.table + } + /// Returns the columns of this table as an `IndexMap` + #[must_use] + pub fn inner_table(&self) -> &IndexMap> { + &self.table + } + /// Returns the columns of this table as an Iterator + pub fn column_names(&self) -> impl Iterator { + self.table.keys() + } +} + +// Note: we modify the default PartialEq for IndexMap to also check for column ordering. +// This is to align with the behaviour of a `RecordBatch`. +impl PartialEq for Table { + fn eq(&self, other: &Self) -> bool { + self.table == other.table + && self + .table + .keys() + .zip(other.table.keys()) + .all(|(a, b)| a == b) + } +} + +#[cfg(test)] +impl core::ops::Index<&str> for Table<'a, S> { + type Output = Column<'a, S>; + fn index(&self, index: &str) -> &Self::Output { + self.table + .get(&index.parse::().unwrap()) + .unwrap() + } +} \ No newline at end of file diff --git a/crates/proof-of-sql/src/base/database/table_test.rs b/crates/proof-of-sql/src/base/database/table_test.rs new file mode 100644 index 000000000..1287379c0 --- /dev/null +++ b/crates/proof-of-sql/src/base/database/table_test.rs @@ -0,0 +1,187 @@ +use crate::{ + base::{ + database::{owned_table_utility::*, OwnedColumn, OwnedTable, OwnedTableError}, + map::IndexMap, + scalar::test_scalar::TestScalar, + }, + proof_primitive::dory::DoryScalar, +}; +use proof_of_sql_parser::{ + posql_time::{PoSQLTimeUnit, PoSQLTimeZone}, + Identifier, +}; + +#[test] +fn we_can_create_a_table_with_no_columns() { + let table = OwnedTable::::try_new(IndexMap::default()).unwrap(); + assert_eq!(table.num_columns(), 0); +} +#[test] +fn we_can_create_an_empty_table() { + let owned_table = owned_table::([ + bigint("bigint", [0; 0]), + int128("decimal", [0; 0]), + varchar("varchar", ["0"; 0]), + scalar("scalar", [0; 0]), + boolean("boolean", [true; 0]), + ]); + let mut table = IndexMap::default(); + table.insert( + Identifier::try_new("bigint").unwrap(), + OwnedColumn::BigInt(vec![]), + ); + table.insert( + Identifier::try_new("decimal").unwrap(), + OwnedColumn::Int128(vec![]), + ); + table.insert( + Identifier::try_new("varchar").unwrap(), + OwnedColumn::VarChar(vec![]), + ); + table.insert( + Identifier::try_new("scalar").unwrap(), + OwnedColumn::Scalar(vec![]), + ); + table.insert( + Identifier::try_new("boolean").unwrap(), + OwnedColumn::Boolean(vec![]), + ); + assert_eq!(owned_table.into_inner(), table); +} +#[test] +fn we_can_create_a_table_with_data() { + let owned_table = owned_table([ + bigint("bigint", [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]), + int128("decimal", [0, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]), + varchar("varchar", ["0", "1", "2", "3", "4", "5", "6", "7", "8"]), + scalar("scalar", [0, 1, 2, 3, 4, 5, 6, 7, 8]), + boolean( + "boolean", + [true, false, true, false, true, false, true, false, true], + ), + timestamptz( + "time_stamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::Utc, + [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], + ), + ]); + let mut table = IndexMap::default(); + table.insert( + Identifier::try_new("time_stamp").unwrap(), + OwnedColumn::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::Utc, + [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX].into(), + ), + ); + table.insert( + Identifier::try_new("bigint").unwrap(), + OwnedColumn::BigInt(vec![0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]), + ); + table.insert( + Identifier::try_new("decimal").unwrap(), + OwnedColumn::Int128(vec![0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]), + ); + table.insert( + Identifier::try_new("varchar").unwrap(), + OwnedColumn::VarChar(vec![ + "0".to_string(), + "1".to_string(), + "2".to_string(), + "3".to_string(), + "4".to_string(), + "5".to_string(), + "6".to_string(), + "7".to_string(), + "8".to_string(), + ]), + ); + table.insert( + Identifier::try_new("scalar").unwrap(), + OwnedColumn::Scalar(vec![ + DoryScalar::from(0), + 1.into(), + 2.into(), + 3.into(), + 4.into(), + 5.into(), + 6.into(), + 7.into(), + 8.into(), + ]), + ); + table.insert( + Identifier::try_new("boolean").unwrap(), + OwnedColumn::Boolean(vec![ + true, false, true, false, true, false, true, false, true, + ]), + ); + assert_eq!(owned_table.into_inner(), table); +} +#[test] +fn we_get_inequality_between_tables_with_differing_column_order() { + let owned_table_a: OwnedTable = owned_table([ + bigint("a", [0; 0]), + int128("b", [0; 0]), + varchar("c", ["0"; 0]), + boolean("d", [false; 0]), + timestamptz( + "time_stamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::Utc, + [0; 0], + ), + ]); + let owned_table_b: OwnedTable = owned_table([ + boolean("d", [false; 0]), + int128("b", [0; 0]), + bigint("a", [0; 0]), + varchar("c", ["0"; 0]), + timestamptz( + "time_stamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::Utc, + [0; 0], + ), + ]); + assert_ne!(owned_table_a, owned_table_b); +} +#[test] +fn we_get_inequality_between_tables_with_differing_data() { + let owned_table_a: OwnedTable = owned_table([ + bigint("a", [0]), + int128("b", [0]), + varchar("c", ["0"]), + boolean("d", [true]), + timestamptz( + "time_stamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::Utc, + [1_625_072_400], + ), + ]); + let owned_table_b: OwnedTable = owned_table([ + bigint("a", [1]), + int128("b", [0]), + varchar("c", ["0"]), + boolean("d", [true]), + timestamptz( + "time_stamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::Utc, + [1_625_076_000], + ), + ]); + assert_ne!(owned_table_a, owned_table_b); +} +#[test] +fn we_cannot_create_a_table_with_differing_column_lengths() { + assert!(matches!( + OwnedTable::::try_from_iter([ + ("a".parse().unwrap(), OwnedColumn::BigInt(vec![0])), + ("b".parse().unwrap(), OwnedColumn::BigInt(vec![])), + ]), + Err(OwnedTableError::ColumnLengthMismatch) + )); +}