-
Notifications
You must be signed in to change notification settings - Fork 193
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
284 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
use super::Column; | ||
use crate::base::{map::IndexMap, scalar::Scalar}; | ||
use proof_of_sql_parser::Identifier; | ||
use snafu::Snafu; | ||
|
||
/// An error that occurs when working with tables. | ||
#[derive(Snafu, Debug, PartialEq, Eq)] | ||
pub enum TableError { | ||
/// The columns have different lengths. | ||
#[snafu(display("Columns have different lengths"))] | ||
ColumnLengthMismatch, | ||
} | ||
/// A table of data, with schema included. This is simply a map from `Identifier` to `Column`, | ||
/// where columns order matters. | ||
/// This is primarily used as an internal result that is used before | ||
/// converting to the final result in either Arrow format or JSON. | ||
/// This is the analog of an arrow [`RecordBatch`](arrow::record_batch::RecordBatch). | ||
#[derive(Debug, Clone, Eq)] | ||
pub struct Table<'a, S: Scalar> { | ||
table: IndexMap<Identifier, Column<'a, S>>, | ||
} | ||
impl<S: Scalar> Table<'a, S> { | ||
/// Creates a new [`Table`]. | ||
pub fn try_new(table: IndexMap<Identifier, Column<'a, S>>) -> Result<Self, TableError> { | ||
if table.is_empty() { | ||
return Ok(Self { table }); | ||
} | ||
let num_rows = table[0].len(); | ||
if table.values().any(|column| column.len() != num_rows) { | ||
Err(TableError::ColumnLengthMismatch) | ||
} else { | ||
Ok(Self { table }) | ||
} | ||
} | ||
/// Creates a new [`Table`]. | ||
pub fn try_from_iter<T: IntoIterator<Item = (Identifier, Column<'a, S>)>>( | ||
iter: T, | ||
) -> Result<Self, TableError> { | ||
Self::try_new(IndexMap::from_iter(iter)) | ||
} | ||
/// Number of columns in the table. | ||
#[must_use] | ||
pub fn num_columns(&self) -> usize { | ||
self.table.len() | ||
} | ||
/// Number of rows in the table. | ||
#[must_use] | ||
pub fn num_rows(&self) -> usize { | ||
if self.table.is_empty() { | ||
0 | ||
} else { | ||
self.table[0].len() | ||
} | ||
} | ||
/// Whether the table has no columns. | ||
#[must_use] | ||
pub fn is_empty(&self) -> bool { | ||
self.table.is_empty() | ||
} | ||
/// Returns the columns of this table as an `IndexMap` | ||
#[must_use] | ||
pub fn into_inner(self) -> IndexMap<Identifier, OwnedColumn<S>> { | ||
self.table | ||
} | ||
/// Returns the columns of this table as an `IndexMap` | ||
#[must_use] | ||
pub fn inner_table(&self) -> &IndexMap<Identifier, OwnedColumn<S>> { | ||
&self.table | ||
} | ||
/// Returns the columns of this table as an Iterator | ||
pub fn column_names(&self) -> impl Iterator<Item = &Identifier> { | ||
self.table.keys() | ||
} | ||
} | ||
|
||
// Note: we modify the default PartialEq for IndexMap to also check for column ordering. | ||
// This is to align with the behaviour of a `RecordBatch`. | ||
impl<S: Scalar> PartialEq for Table<S> { | ||
fn eq(&self, other: &Self) -> bool { | ||
self.table == other.table | ||
&& self | ||
.table | ||
.keys() | ||
.zip(other.table.keys()) | ||
.all(|(a, b)| a == b) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
impl<S: Scalar> core::ops::Index<&str> for Table<'a, S> { | ||
type Output = Column<'a, S>; | ||
fn index(&self, index: &str) -> &Self::Output { | ||
self.table | ||
.get(&index.parse::<Identifier>().unwrap()) | ||
.unwrap() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
use crate::{ | ||
base::{ | ||
database::{owned_table_utility::*, OwnedColumn, OwnedTable, OwnedTableError}, | ||
map::IndexMap, | ||
scalar::test_scalar::TestScalar, | ||
}, | ||
proof_primitive::dory::DoryScalar, | ||
}; | ||
use proof_of_sql_parser::{ | ||
posql_time::{PoSQLTimeUnit, PoSQLTimeZone}, | ||
Identifier, | ||
}; | ||
|
||
#[test] | ||
fn we_can_create_a_table_with_no_columns() { | ||
let table = OwnedTable::<TestScalar>::try_new(IndexMap::default()).unwrap(); | ||
assert_eq!(table.num_columns(), 0); | ||
} | ||
#[test] | ||
fn we_can_create_an_empty_table() { | ||
let owned_table = owned_table::<DoryScalar>([ | ||
bigint("bigint", [0; 0]), | ||
int128("decimal", [0; 0]), | ||
varchar("varchar", ["0"; 0]), | ||
scalar("scalar", [0; 0]), | ||
boolean("boolean", [true; 0]), | ||
]); | ||
let mut table = IndexMap::default(); | ||
table.insert( | ||
Identifier::try_new("bigint").unwrap(), | ||
OwnedColumn::BigInt(vec![]), | ||
); | ||
table.insert( | ||
Identifier::try_new("decimal").unwrap(), | ||
OwnedColumn::Int128(vec![]), | ||
); | ||
table.insert( | ||
Identifier::try_new("varchar").unwrap(), | ||
OwnedColumn::VarChar(vec![]), | ||
); | ||
table.insert( | ||
Identifier::try_new("scalar").unwrap(), | ||
OwnedColumn::Scalar(vec![]), | ||
); | ||
table.insert( | ||
Identifier::try_new("boolean").unwrap(), | ||
OwnedColumn::Boolean(vec![]), | ||
); | ||
assert_eq!(owned_table.into_inner(), table); | ||
} | ||
#[test] | ||
fn we_can_create_a_table_with_data() { | ||
let owned_table = owned_table([ | ||
bigint("bigint", [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]), | ||
int128("decimal", [0, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]), | ||
varchar("varchar", ["0", "1", "2", "3", "4", "5", "6", "7", "8"]), | ||
scalar("scalar", [0, 1, 2, 3, 4, 5, 6, 7, 8]), | ||
boolean( | ||
"boolean", | ||
[true, false, true, false, true, false, true, false, true], | ||
), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], | ||
), | ||
]); | ||
let mut table = IndexMap::default(); | ||
table.insert( | ||
Identifier::try_new("time_stamp").unwrap(), | ||
OwnedColumn::TimestampTZ( | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX].into(), | ||
), | ||
); | ||
table.insert( | ||
Identifier::try_new("bigint").unwrap(), | ||
OwnedColumn::BigInt(vec![0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]), | ||
); | ||
table.insert( | ||
Identifier::try_new("decimal").unwrap(), | ||
OwnedColumn::Int128(vec![0_i128, 1, 2, 3, 4, 5, 6, i128::MIN, i128::MAX]), | ||
); | ||
table.insert( | ||
Identifier::try_new("varchar").unwrap(), | ||
OwnedColumn::VarChar(vec![ | ||
"0".to_string(), | ||
"1".to_string(), | ||
"2".to_string(), | ||
"3".to_string(), | ||
"4".to_string(), | ||
"5".to_string(), | ||
"6".to_string(), | ||
"7".to_string(), | ||
"8".to_string(), | ||
]), | ||
); | ||
table.insert( | ||
Identifier::try_new("scalar").unwrap(), | ||
OwnedColumn::Scalar(vec![ | ||
DoryScalar::from(0), | ||
1.into(), | ||
2.into(), | ||
3.into(), | ||
4.into(), | ||
5.into(), | ||
6.into(), | ||
7.into(), | ||
8.into(), | ||
]), | ||
); | ||
table.insert( | ||
Identifier::try_new("boolean").unwrap(), | ||
OwnedColumn::Boolean(vec![ | ||
true, false, true, false, true, false, true, false, true, | ||
]), | ||
); | ||
assert_eq!(owned_table.into_inner(), table); | ||
} | ||
#[test] | ||
fn we_get_inequality_between_tables_with_differing_column_order() { | ||
let owned_table_a: OwnedTable<TestScalar> = owned_table([ | ||
bigint("a", [0; 0]), | ||
int128("b", [0; 0]), | ||
varchar("c", ["0"; 0]), | ||
boolean("d", [false; 0]), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0; 0], | ||
), | ||
]); | ||
let owned_table_b: OwnedTable<TestScalar> = owned_table([ | ||
boolean("d", [false; 0]), | ||
int128("b", [0; 0]), | ||
bigint("a", [0; 0]), | ||
varchar("c", ["0"; 0]), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[0; 0], | ||
), | ||
]); | ||
assert_ne!(owned_table_a, owned_table_b); | ||
} | ||
#[test] | ||
fn we_get_inequality_between_tables_with_differing_data() { | ||
let owned_table_a: OwnedTable<DoryScalar> = owned_table([ | ||
bigint("a", [0]), | ||
int128("b", [0]), | ||
varchar("c", ["0"]), | ||
boolean("d", [true]), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[1_625_072_400], | ||
), | ||
]); | ||
let owned_table_b: OwnedTable<DoryScalar> = owned_table([ | ||
bigint("a", [1]), | ||
int128("b", [0]), | ||
varchar("c", ["0"]), | ||
boolean("d", [true]), | ||
timestamptz( | ||
"time_stamp", | ||
PoSQLTimeUnit::Second, | ||
PoSQLTimeZone::Utc, | ||
[1_625_076_000], | ||
), | ||
]); | ||
assert_ne!(owned_table_a, owned_table_b); | ||
} | ||
#[test] | ||
fn we_cannot_create_a_table_with_differing_column_lengths() { | ||
assert!(matches!( | ||
OwnedTable::<TestScalar>::try_from_iter([ | ||
("a".parse().unwrap(), OwnedColumn::BigInt(vec![0])), | ||
("b".parse().unwrap(), OwnedColumn::BigInt(vec![])), | ||
]), | ||
Err(OwnedTableError::ColumnLengthMismatch) | ||
)); | ||
} |