Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add Table && TableTestAccessor && table utilities #364

Merged
merged 4 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions crates/proof-of-sql/src/base/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ pub(crate) use owned_table::OwnedTableError;
mod owned_table_test;
pub mod owned_table_utility;

mod table;
pub use table::Table;
#[cfg(test)]
pub(crate) use table::TableError;
#[cfg(test)]
mod table_test;
pub mod table_utility;

/// TODO: add docs
pub(crate) mod expression_evaluation;
mod expression_evaluation_error;
Expand All @@ -87,6 +95,11 @@ pub use owned_table_test_accessor::OwnedTableTestAccessor;
#[cfg(all(test, feature = "blitzar"))]
mod owned_table_test_accessor_test;

mod table_test_accessor;
pub use table_test_accessor::TableTestAccessor;
#[cfg(all(test, feature = "blitzar"))]
mod table_test_accessor_test;

/// TODO: add docs
pub(crate) mod filter_util;
#[cfg(test)]
Expand Down
93 changes: 93 additions & 0 deletions crates/proof-of-sql/src/base/database/table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
use super::Column;
use crate::base::{map::IndexMap, scalar::Scalar};
use proof_of_sql_parser::Identifier;
use snafu::Snafu;

/// An error that occurs when working with tables.
#[derive(Snafu, Debug, PartialEq, Eq)]
pub enum TableError {
/// The columns have different lengths.
#[snafu(display("Columns have different lengths"))]
ColumnLengthMismatch,
}
/// A table of data, with schema included. This is simply a map from `Identifier` to `Column`,
/// where columns order matters.
/// This is primarily used as an internal result that is used before
/// converting to the final result in either Arrow format or JSON.
/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch).
#[derive(Debug, Clone, Eq)]
pub struct Table<'a, S: Scalar> {
table: IndexMap<Identifier, Column<'a, S>>,
}
impl<'a, S: Scalar> Table<'a, S> {
/// Creates a new [`Table`].
pub fn try_new(table: IndexMap<Identifier, Column<'a, S>>) -> Result<Self, TableError> {
if table.is_empty() {
return Ok(Self { table });
}
let num_rows = table[0].len();
if table.values().any(|column| column.len() != num_rows) {
Err(TableError::ColumnLengthMismatch)
} else {
Ok(Self { table })
}
}
/// Creates a new [`Table`].
pub fn try_from_iter<T: IntoIterator<Item = (Identifier, Column<'a, S>)>>(
iter: T,
) -> Result<Self, TableError> {
Self::try_new(IndexMap::from_iter(iter))
}
/// Number of columns in the table.
#[must_use]
pub fn num_columns(&self) -> usize {
self.table.len()
}
/// Number of rows in the table. For an empty table, this will return `None`.
#[must_use]
pub fn num_rows(&self) -> Option<usize> {
(!self.table.is_empty()).then(|| self.table[0].len())
}
/// Whether the table has no columns.
#[must_use]
pub fn is_empty(&self) -> bool {
self.table.is_empty()
}
/// Returns the columns of this table as an `IndexMap`
#[must_use]
pub fn into_inner(self) -> IndexMap<Identifier, Column<'a, S>> {
self.table
}
/// Returns the columns of this table as an `IndexMap`
#[must_use]
pub fn inner_table(&self) -> &IndexMap<Identifier, Column<'a, S>> {
&self.table
}
/// Returns the columns of this table as an Iterator
pub fn column_names(&self) -> impl Iterator<Item = &Identifier> {
self.table.keys()
}
}

// Note: we modify the default PartialEq for IndexMap to also check for column ordering.
// This is to align with the behaviour of a `RecordBatch`.
impl<S: Scalar> PartialEq for Table<'_, S> {
fn eq(&self, other: &Self) -> bool {
self.table == other.table
&& self
.table
.keys()
.zip(other.table.keys())
.all(|(a, b)| a == b)
}
}

#[cfg(test)]
impl<'a, S: Scalar> core::ops::Index<&str> for Table<'a, S> {
type Output = Column<'a, S>;
fn index(&self, index: &str) -> &Self::Output {
self.table
.get(&index.parse::<Identifier>().unwrap())
.unwrap()
}
}
206 changes: 206 additions & 0 deletions crates/proof-of-sql/src/base/database/table_test.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
use crate::base::{
database::{table_utility::*, Column, Table, TableError},
map::IndexMap,
scalar::test_scalar::TestScalar,
};
use bumpalo::Bump;
use proof_of_sql_parser::{
posql_time::{PoSQLTimeUnit, PoSQLTimeZone},
Identifier,
};

#[test]
fn we_can_create_a_table_with_no_columns() {
let table = Table::<TestScalar>::try_new(IndexMap::default()).unwrap();
assert_eq!(table.num_columns(), 0);
assert_eq!(table.num_rows(), None);
}
#[test]
fn we_can_create_an_empty_table() {
let alloc = Bump::new();
let borrowed_table = table::<TestScalar>([
borrowed_bigint("bigint", [0; 0], &alloc),
borrowed_int128("decimal", [0; 0], &alloc),
borrowed_varchar("varchar", ["0"; 0], &alloc),
borrowed_scalar("scalar", [0; 0], &alloc),
borrowed_boolean("boolean", [true; 0], &alloc),
]);
let mut table = IndexMap::default();
table.insert(Identifier::try_new("bigint").unwrap(), Column::BigInt(&[]));
table.insert(Identifier::try_new("decimal").unwrap(), Column::Int128(&[]));
table.insert(
Identifier::try_new("varchar").unwrap(),
Column::VarChar((&[], &[])),
);
table.insert(Identifier::try_new("scalar").unwrap(), Column::Scalar(&[]));
table.insert(
Identifier::try_new("boolean").unwrap(),
Column::Boolean(&[]),
);
assert_eq!(borrowed_table.into_inner(), table);
}

#[test]
fn we_can_create_a_table_with_data() {
let alloc = Bump::new();

let borrowed_table = table::<TestScalar>([
borrowed_bigint(
"bigint",
[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX],
&alloc,
),
borrowed_int128(
"decimal",
[0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX],
&alloc,
),
borrowed_varchar(
"varchar",
["0", "1", "2", "3", "4", "5", "6", "7", "8"],
&alloc,
),
borrowed_scalar("scalar", [0, 1, 2, 3, 4, 5, 6, 7, 8], &alloc),
borrowed_boolean(
"boolean",
[true, false, true, false, true, false, true, false, true],
&alloc,
),
borrowed_timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX],
&alloc,
),
]);

let mut expected_table = IndexMap::default();

let time_stamp_data = alloc.alloc_slice_copy(&[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]);
expected_table.insert(
Identifier::try_new("time_stamp").unwrap(),
Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, time_stamp_data),
);

let bigint_data = alloc.alloc_slice_copy(&[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]);
expected_table.insert(
Identifier::try_new("bigint").unwrap(),
Column::BigInt(bigint_data),
);

let decimal_data = alloc.alloc_slice_copy(&[0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]);
expected_table.insert(
Identifier::try_new("decimal").unwrap(),
Column::Int128(decimal_data),
);

let varchar_data: Vec<&str> = ["0", "1", "2", "3", "4", "5", "6", "7", "8"]
.iter()
.map(|&s| alloc.alloc_str(s) as &str)
.collect();
let varchar_str_slice = alloc.alloc_slice_clone(&varchar_data);
let varchar_scalars: Vec<TestScalar> = varchar_data.iter().map(Into::into).collect();
let varchar_scalars_slice = alloc.alloc_slice_clone(&varchar_scalars);
expected_table.insert(
Identifier::try_new("varchar").unwrap(),
Column::VarChar((varchar_str_slice, varchar_scalars_slice)),
);

let scalar_data: Vec<TestScalar> = (0..=8).map(TestScalar::from).collect();
let scalar_slice = alloc.alloc_slice_copy(&scalar_data);
expected_table.insert(
Identifier::try_new("scalar").unwrap(),
Column::Scalar(scalar_slice),
);

let boolean_data =
alloc.alloc_slice_copy(&[true, false, true, false, true, false, true, false, true]);
expected_table.insert(
Identifier::try_new("boolean").unwrap(),
Column::Boolean(boolean_data),
);

assert_eq!(borrowed_table.into_inner(), expected_table);
}

#[test]
fn we_get_inequality_between_tables_with_differing_column_order() {
let alloc = Bump::new();

let table_a: Table<'_, TestScalar> = table([
borrowed_bigint("a", [0; 0], &alloc),
borrowed_int128("b", [0; 0], &alloc),
borrowed_varchar("c", ["0"; 0], &alloc),
borrowed_boolean("d", [false; 0], &alloc),
borrowed_timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0_i64; 0],
&alloc,
),
]);

let table_b: Table<'_, TestScalar> = table([
borrowed_boolean("d", [false; 0], &alloc),
borrowed_int128("b", [0; 0], &alloc),
borrowed_bigint("a", [0; 0], &alloc),
borrowed_varchar("c", ["0"; 0], &alloc),
borrowed_timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[0_i64; 0],
&alloc,
),
]);

assert_ne!(table_a, table_b);
}

#[test]
fn we_get_inequality_between_tables_with_differing_data() {
let alloc = Bump::new();

let table_a: Table<'_, TestScalar> = table([
borrowed_bigint("a", [0], &alloc),
borrowed_int128("b", [0], &alloc),
borrowed_varchar("c", ["0"], &alloc),
borrowed_boolean("d", [true], &alloc),
borrowed_timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[1_625_072_400],
&alloc,
),
]);

let table_b: Table<'_, TestScalar> = table([
borrowed_bigint("a", [1], &alloc),
borrowed_int128("b", [0], &alloc),
borrowed_varchar("c", ["0"], &alloc),
borrowed_boolean("d", [true], &alloc),
borrowed_timestamptz(
"time_stamp",
PoSQLTimeUnit::Second,
PoSQLTimeZone::Utc,
[1_625_076_000],
&alloc,
),
]);

assert_ne!(table_a, table_b);
}

#[test]
fn we_cannot_create_a_table_with_differing_column_lengths() {
assert!(matches!(
Table::<TestScalar>::try_from_iter([
("a".parse().unwrap(), Column::BigInt(&[0])),
("b".parse().unwrap(), Column::BigInt(&[])),
]),
Err(TableError::ColumnLengthMismatch)
));
}
Loading
Loading