-
Notifications
You must be signed in to change notification settings - Fork 193
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add
Table
and table_utility.rs
- Loading branch information
Showing
4 changed files
with
650 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
use super::Column; | ||
use crate::base::{map::IndexMap, scalar::Scalar}; | ||
use proof_of_sql_parser::Identifier; | ||
use snafu::Snafu; | ||
|
||
/// An error that occurs when working with tables. | ||
#[derive(Snafu, Debug, PartialEq, Eq)] | ||
pub enum TableError { | ||
/// The columns have different lengths. | ||
#[snafu(display("Columns have different lengths"))] | ||
ColumnLengthMismatch, | ||
} | ||
/// A table of data, with schema included. This is simply a map from `Identifier` to `Column`, | ||
/// where columns order matters. | ||
/// This is primarily used as an internal result that is used before | ||
/// converting to the final result in either Arrow format or JSON. | ||
/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch). | ||
#[derive(Debug, Clone, Eq)] | ||
pub struct Table<'a, S: Scalar> { | ||
table: IndexMap<Identifier, Column<'a, S>>, | ||
} | ||
impl<'a, S: Scalar> Table<'a, S> { | ||
/// Creates a new [`Table`]. | ||
pub fn try_new(table: IndexMap<Identifier, Column<'a, S>>) -> Result<Self, TableError> { | ||
if table.is_empty() { | ||
return Ok(Self { table }); | ||
} | ||
let num_rows = table[0].len(); | ||
if table.values().any(|column| column.len() != num_rows) { | ||
Err(TableError::ColumnLengthMismatch) | ||
} else { | ||
Ok(Self { table }) | ||
} | ||
} | ||
/// Creates a new [`Table`]. | ||
pub fn try_from_iter<T: IntoIterator<Item = (Identifier, Column<'a, S>)>>( | ||
iter: T, | ||
) -> Result<Self, TableError> { | ||
Self::try_new(IndexMap::from_iter(iter)) | ||
} | ||
/// Number of columns in the table. | ||
#[must_use] | ||
pub fn num_columns(&self) -> usize { | ||
self.table.len() | ||
} | ||
/// Number of rows in the table. | ||
#[must_use] | ||
pub fn num_rows(&self) -> usize { | ||
if self.table.is_empty() { | ||
0 | ||
} else { | ||
self.table[0].len() | ||
} | ||
} | ||
/// Whether the table has no columns. | ||
#[must_use] | ||
pub fn is_empty(&self) -> bool { | ||
self.table.is_empty() | ||
} | ||
/// Returns the columns of this table as an `IndexMap` | ||
#[must_use] | ||
pub fn into_inner(self) -> IndexMap<Identifier, Column<'a, S>> { | ||
self.table | ||
} | ||
/// Returns the columns of this table as an `IndexMap` | ||
#[must_use] | ||
pub fn inner_table(&self) -> &IndexMap<Identifier, Column<'a, S>> { | ||
&self.table | ||
} | ||
/// Returns the columns of this table as an Iterator | ||
pub fn column_names(&self) -> impl Iterator<Item = &Identifier> { | ||
self.table.keys() | ||
} | ||
} | ||
|
||
// Note: we modify the default PartialEq for IndexMap to also check for column ordering. | ||
// This is to align with the behaviour of a `RecordBatch`. | ||
impl<S: Scalar> PartialEq for Table<'_, S> { | ||
fn eq(&self, other: &Self) -> bool { | ||
self.table == other.table | ||
&& self | ||
.table | ||
.keys() | ||
.zip(other.table.keys()) | ||
.all(|(a, b)| a == b) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
impl<'a, S: Scalar> core::ops::Index<&str> for Table<'a, S> { | ||
type Output = Column<'a, S>; | ||
fn index(&self, index: &str) -> &Self::Output { | ||
self.table | ||
.get(&index.parse::<Identifier>().unwrap()) | ||
.unwrap() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
use crate::{ | ||
base::{ | ||
database::{table_utility::*, Column, Table, TableError}, | ||
map::IndexMap, | ||
scalar::test_scalar::TestScalar, | ||
}, | ||
proof_primitive::dory::DoryScalar, | ||
}; | ||
use bumpalo::Bump; | ||
use proof_of_sql_parser::{ | ||
posql_time::{PoSQLTimeUnit, PoSQLTimeZone}, | ||
Identifier, | ||
}; | ||
|
||
#[test] | ||
fn we_can_create_a_table_with_no_columns() { | ||
let table = Table::<TestScalar>::try_new(IndexMap::default()).unwrap(); | ||
assert_eq!(table.num_columns(), 0); | ||
} | ||
#[test] | ||
fn we_can_create_an_empty_table() { | ||
let alloc = Bump::new(); | ||
let borrowed_table = table::<DoryScalar>([ | ||
bigint("bigint", [0; 0], &alloc), | ||
int128("decimal", [0; 0], &alloc), | ||
varchar("varchar", ["0"; 0], &alloc), | ||
scalar("scalar", [0; 0], &alloc), | ||
boolean("boolean", [true; 0], &alloc), | ||
]); | ||
let mut table = IndexMap::default(); | ||
table.insert(Identifier::try_new("bigint").unwrap(), Column::BigInt(&[])); | ||
table.insert(Identifier::try_new("decimal").unwrap(), Column::Int128(&[])); | ||
table.insert( | ||
Identifier::try_new("varchar").unwrap(), | ||
Column::VarChar((&[], &[])), | ||
); | ||
table.insert(Identifier::try_new("scalar").unwrap(), Column::Scalar(&[])); | ||
table.insert( | ||
Identifier::try_new("boolean").unwrap(), | ||
Column::Boolean(&[]), | ||
); | ||
assert_eq!(borrowed_table.into_inner(), table); | ||
} | ||
|
||
#[test] | ||
fn we_can_create_a_table_with_data() { | ||
let alloc = Bump::new(); | ||
|
||
let borrowed_table = table::<DoryScalar>([ | ||
bigint( | ||
"bigint", | ||
[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], | ||
&alloc, | ||
), | ||
int128( | ||
"decimal", | ||
[0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX], | ||
&alloc, | ||
), | ||
varchar( | ||
"varchar", | ||
["0", "1", "2", "3", "4", "5", "6", "7", "8"], | ||
&alloc, | ||
), | ||
scalar("scalar", [0, 1, 2, 3, 4, 5, 6, 7, 8], &alloc), | ||
boolean( | ||
"boolean", | ||
[true, false, true, false, true, false, true, false, true], | ||
&alloc, | ||
), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], | ||
&alloc, | ||
), | ||
]); | ||
|
||
let mut expected_table = IndexMap::default(); | ||
|
||
let time_stamp_data = alloc.alloc_slice_copy(&[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]); | ||
expected_table.insert( | ||
Identifier::try_new("time_stamp").unwrap(), | ||
Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, time_stamp_data), | ||
); | ||
|
||
let bigint_data = alloc.alloc_slice_copy(&[0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]); | ||
expected_table.insert( | ||
Identifier::try_new("bigint").unwrap(), | ||
Column::BigInt(bigint_data), | ||
); | ||
|
||
let decimal_data = alloc.alloc_slice_copy(&[0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]); | ||
expected_table.insert( | ||
Identifier::try_new("decimal").unwrap(), | ||
Column::Int128(decimal_data), | ||
); | ||
|
||
let varchar_data: Vec<&str> = ["0", "1", "2", "3", "4", "5", "6", "7", "8"] | ||
.iter() | ||
.map(|&s| alloc.alloc_str(s) as &str) | ||
.collect(); | ||
let varchar_str_slice = alloc.alloc_slice_clone(&varchar_data); | ||
let varchar_scalars: Vec<DoryScalar> = varchar_data.iter().map(Into::into).collect(); | ||
let varchar_scalars_slice = alloc.alloc_slice_clone(&varchar_scalars); | ||
expected_table.insert( | ||
Identifier::try_new("varchar").unwrap(), | ||
Column::VarChar((varchar_str_slice, varchar_scalars_slice)), | ||
); | ||
|
||
let scalar_data: Vec<DoryScalar> = (0..=8).map(DoryScalar::from).collect(); | ||
let scalar_slice = alloc.alloc_slice_copy(&scalar_data); | ||
expected_table.insert( | ||
Identifier::try_new("scalar").unwrap(), | ||
Column::Scalar(scalar_slice), | ||
); | ||
|
||
let boolean_data = | ||
alloc.alloc_slice_copy(&[true, false, true, false, true, false, true, false, true]); | ||
expected_table.insert( | ||
Identifier::try_new("boolean").unwrap(), | ||
Column::Boolean(boolean_data), | ||
); | ||
|
||
assert_eq!(borrowed_table.into_inner(), expected_table); | ||
} | ||
|
||
#[test] | ||
fn we_get_inequality_between_tables_with_differing_column_order() { | ||
let alloc = Bump::new(); | ||
|
||
let table_a: Table<'_, TestScalar> = table([ | ||
bigint("a", [0; 0], &alloc), | ||
int128("b", [0; 0], &alloc), | ||
varchar("c", ["0"; 0], &alloc), | ||
boolean("d", [false; 0], &alloc), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0_i64; 0], | ||
&alloc, | ||
), | ||
]); | ||
|
||
let table_b: Table<'_, TestScalar> = table([ | ||
boolean("d", [false; 0], &alloc), | ||
int128("b", [0; 0], &alloc), | ||
bigint("a", [0; 0], &alloc), | ||
varchar("c", ["0"; 0], &alloc), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0_i64; 0], | ||
&alloc, | ||
), | ||
]); | ||
|
||
assert_ne!(table_a, table_b); | ||
} | ||
|
||
#[test] | ||
fn we_get_inequality_between_tables_with_differing_data() { | ||
let alloc = Bump::new(); | ||
|
||
let table_a: Table<'_, DoryScalar> = table([ | ||
bigint("a", [0], &alloc), | ||
int128("b", [0], &alloc), | ||
varchar("c", ["0"], &alloc), | ||
boolean("d", [true], &alloc), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[1_625_072_400], | ||
&alloc, | ||
), | ||
]); | ||
|
||
let table_b: Table<'_, DoryScalar> = table([ | ||
bigint("a", [1], &alloc), | ||
int128("b", [0], &alloc), | ||
varchar("c", ["0"], &alloc), | ||
boolean("d", [true], &alloc), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[1_625_076_000], | ||
&alloc, | ||
), | ||
]); | ||
|
||
assert_ne!(table_a, table_b); | ||
} | ||
|
||
#[test] | ||
fn we_cannot_create_a_table_with_differing_column_lengths() { | ||
assert!(matches!( | ||
Table::<TestScalar>::try_from_iter([ | ||
("a".parse().unwrap(), Column::BigInt(&[0])), | ||
("b".parse().unwrap(), Column::BigInt(&[])), | ||
]), | ||
Err(TableError::ColumnLengthMismatch) | ||
)); | ||
} |
Oops, something went wrong.