Skip to content

Commit

Permalink
feat: add Table
Browse files Browse the repository at this point in the history
  • Loading branch information
iajoiner committed Nov 11, 2024
1 parent 107fa10 commit a5d0e80
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 0 deletions.
97 changes: 97 additions & 0 deletions crates/proof-of-sql/src/base/database/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use super::Column;
use crate::base::{map::IndexMap, scalar::Scalar};
use proof_of_sql_parser::Identifier;
use snafu::Snafu;

/// An error that occurs when working with tables.
#[derive(Snafu, Debug, PartialEq, Eq)]
pub enum TableError {
/// The columns have different lengths.
#[snafu(display("Columns have different lengths"))]
ColumnLengthMismatch,
}
/// A table of data, with schema included. This is simply a map from `Identifier` to `Column`,
/// where columns order matters.
/// This is primarily used as an internal result that is used before
/// converting to the final result in either Arrow format or JSON.
/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch).
#[derive(Debug, Clone, Eq)]
pub struct Table<'a, S: Scalar> {
table: IndexMap<Identifier, Column<'a, S>>,
}
impl<S: Scalar> Table<'a, S> {
/// Creates a new [`Table`].
pub fn try_new(table: IndexMap<Identifier, Column<'a, S>>) -> Result<Self, TableError> {
if table.is_empty() {
return Ok(Self { table });
}
let num_rows = table[0].len();
if table.values().any(|column| column.len() != num_rows) {
Err(TableError::ColumnLengthMismatch)
} else {
Ok(Self { table })
}
}
/// Creates a new [`Table`].
pub fn try_from_iter<T: IntoIterator<Item = (Identifier, Column<'a, S>)>>(
iter: T,
) -> Result<Self, TableError> {
Self::try_new(IndexMap::from_iter(iter))
}
/// Number of columns in the table.
#[must_use]
pub fn num_columns(&self) -> usize {
self.table.len()
}
/// Number of rows in the table.
#[must_use]
pub fn num_rows(&self) -> usize {
if self.table.is_empty() {
0
} else {
self.table[0].len()
}
}
/// Whether the table has no columns.
#[must_use]
pub fn is_empty(&self) -> bool {
self.table.is_empty()
}
/// Returns the columns of this table as an `IndexMap`
#[must_use]
pub fn into_inner(self) -> IndexMap<Identifier, OwnedColumn<S>> {
self.table
}
/// Returns the columns of this table as an `IndexMap`
#[must_use]
pub fn inner_table(&self) -> &IndexMap<Identifier, OwnedColumn<S>> {
&self.table
}
/// Returns the columns of this table as an Iterator
pub fn column_names(&self) -> impl Iterator<Item = &Identifier> {
self.table.keys()
}
}

// Note: we modify the default PartialEq for IndexMap to also check for column ordering.
// This is to align with the behaviour of a `RecordBatch`.
impl<S: Scalar> PartialEq for Table<S> {
fn eq(&self, other: &Self) -> bool {
self.table == other.table
&& self
.table
.keys()
.zip(other.table.keys())
.all(|(a, b)| a == b)
}
}

#[cfg(test)]
impl<S: Scalar> core::ops::Index<&str> for Table<'a, S> {
type Output = Column<'a, S>;
fn index(&self, index: &str) -> &Self::Output {
self.table
.get(&index.parse::<Identifier>().unwrap())
.unwrap()
}
}
187 changes: 187 additions & 0 deletions crates/proof-of-sql/src/base/database/table_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
use crate::{
base::{
database::{owned_table_utility::*, OwnedColumn, OwnedTable, OwnedTableError},
map::IndexMap,
scalar::test_scalar::TestScalar,
},
proof_primitive::dory::DoryScalar,
};
use proof_of_sql_parser::{
posql_time::{PoSQLTimeUnit, PoSQLTimeZone},
Identifier,
};

#[test]
fn we_can_create_a_table_with_no_columns() {
let table = OwnedTable::<TestScalar>::try_new(IndexMap::default()).unwrap();
assert_eq!(table.num_columns(), 0);
}
#[test]
fn we_can_create_an_empty_table() {
let owned_table = owned_table::<DoryScalar>([
bigint("bigint", [0; 0]),
int128("decimal", [0; 0]),
varchar("varchar", ["0"; 0]),
scalar("scalar", [0; 0]),
boolean("boolean", [true; 0]),
]);
let mut table = IndexMap::default();
table.insert(
Identifier::try_new("bigint").unwrap(),
OwnedColumn::BigInt(vec![]),
);
table.insert(
Identifier::try_new("decimal").unwrap(),
OwnedColumn::Int128(vec![]),
);
table.insert(
Identifier::try_new("varchar").unwrap(),
OwnedColumn::VarChar(vec![]),
);
table.insert(
Identifier::try_new("scalar").unwrap(),
OwnedColumn::Scalar(vec![]),
);
table.insert(
Identifier::try_new("boolean").unwrap(),
OwnedColumn::Boolean(vec![]),
);
assert_eq!(owned_table.into_inner(), table);
}
#[test]
fn we_can_create_a_table_with_data() {
let owned_table = owned_table([
bigint("bigint", [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]),
int128("decimal", [0, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]),
varchar("varchar", ["0", "1", "2", "3", "4", "5", "6", "7", "8"]),
scalar("scalar", [0, 1, 2, 3, 4, 5, 6, 7, 8]),
boolean(
"boolean",
[true, false, true, false, true, false, true, false, true],
),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX],
),
]);
let mut table = IndexMap::default();
table.insert(
Identifier::try_new("time_stamp").unwrap(),
OwnedColumn::TimestampTZ(
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX].into(),
),
);
table.insert(
Identifier::try_new("bigint").unwrap(),
OwnedColumn::BigInt(vec![0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]),
);
table.insert(
Identifier::try_new("decimal").unwrap(),
OwnedColumn::Int128(vec![0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]),
);
table.insert(
Identifier::try_new("varchar").unwrap(),
OwnedColumn::VarChar(vec![
"0".to_string(),
"1".to_string(),
"2".to_string(),
"3".to_string(),
"4".to_string(),
"5".to_string(),
"6".to_string(),
"7".to_string(),
"8".to_string(),
]),
);
table.insert(
Identifier::try_new("scalar").unwrap(),
OwnedColumn::Scalar(vec![
DoryScalar::from(0),
1.into(),
2.into(),
3.into(),
4.into(),
5.into(),
6.into(),
7.into(),
8.into(),
]),
);
table.insert(
Identifier::try_new("boolean").unwrap(),
OwnedColumn::Boolean(vec![
true, false, true, false, true, false, true, false, true,
]),
);
assert_eq!(owned_table.into_inner(), table);
}
#[test]
fn we_get_inequality_between_tables_with_differing_column_order() {
let owned_table_a: OwnedTable<TestScalar> = owned_table([
bigint("a", [0; 0]),
int128("b", [0; 0]),
varchar("c", ["0"; 0]),
boolean("d", [false; 0]),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0; 0],
),
]);
let owned_table_b: OwnedTable<TestScalar> = owned_table([
boolean("d", [false; 0]),
int128("b", [0; 0]),
bigint("a", [0; 0]),
varchar("c", ["0"; 0]),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0; 0],
),
]);
assert_ne!(owned_table_a, owned_table_b);
}
#[test]
fn we_get_inequality_between_tables_with_differing_data() {
let owned_table_a: OwnedTable<DoryScalar> = owned_table([
bigint("a", [0]),
int128("b", [0]),
varchar("c", ["0"]),
boolean("d", [true]),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[1_625_072_400],
),
]);
let owned_table_b: OwnedTable<DoryScalar> = owned_table([
bigint("a", [1]),
int128("b", [0]),
varchar("c", ["0"]),
boolean("d", [true]),
timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[1_625_076_000],
),
]);
assert_ne!(owned_table_a, owned_table_b);
}
#[test]
fn we_cannot_create_a_table_with_differing_column_lengths() {
assert!(matches!(
OwnedTable::<TestScalar>::try_from_iter([
("a".parse().unwrap(), OwnedColumn::BigInt(vec![0])),
("b".parse().unwrap(), OwnedColumn::BigInt(vec![])),
]),
Err(OwnedTableError::ColumnLengthMismatch)
));
}

0 comments on commit a5d0e80

Please sign in to comment.