Skip to content

Commit

Permalink
refactor: simplify proof-of-sql-parser (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
iajoiner authored Jul 25, 2024
1 parent ac8aeef commit c63d33b
Show file tree
Hide file tree
Showing 26 changed files with 120 additions and 101 deletions.
3 changes: 3 additions & 0 deletions crates/proof-of-sql-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ lalrpop-util = { workspace = true, features = ["lexer", "unicode"] }
serde = { workspace = true, features = ["serde_derive"] }
thiserror = { workspace = true }

[features]
parser-test-utility = []

[build-dependencies]
lalrpop = { version = "0.20.0" }

Expand Down
44 changes: 0 additions & 44 deletions crates/proof-of-sql-parser/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use serde::{Deserialize, Serialize};
use thiserror::Error;

/// Errors encountered during the parsing process
Expand All @@ -18,46 +17,3 @@ pub enum ParseError {
/// General parsing error that may occur, for example if the provided schema/object_name strings
/// aren't valid postgres-style identifiers (excluding dollar signs).
pub type ParseResult<T> = std::result::Result<T, ParseError>;

/// Errors related to time operations, including timezone and timestamp conversions.s
#[derive(Error, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum PoSQLTimestampError {
/// Error when the timezone string provided cannot be parsed into a valid timezone.
#[error("invalid timezone string: {0}")]
InvalidTimezone(String),

/// Error indicating an invalid timezone offset was provided.
#[error("invalid timezone offset")]
InvalidTimezoneOffset,

/// Indicates a failure to convert between different representations of time units.
#[error("Invalid time unit")]
InvalidTimeUnit(String),

/// The local time does not exist because there is a gap in the local time.
/// This variant may also be returned if there was an error while resolving the local time,
/// caused by for example missing time zone data files, an error in an OS API, or overflow.
#[error("Local time does not exist because there is a gap in the local time")]
LocalTimeDoesNotExist,

/// The local time is ambiguous because there is a fold in the local time.
/// This variant contains the two possible results, in the order (earliest, latest).
#[error("Unix timestamp is ambiguous because there is a fold in the local time.")]
Ambiguous(String),

/// Represents a catch-all for parsing errors not specifically covered by other variants.
#[error("Timestamp parsing error: {0}")]
ParsingError(String),

/// Represents a failure to parse a provided time unit precision value, PoSQL supports
/// Seconds, Milliseconds, Microseconds, and Nanoseconds
#[error("Timestamp parsing error: {0}")]
UnsupportedPrecision(String),
}

// This exists because TryFrom<DataType> for ColumnType error is String
impl From<PoSQLTimestampError> for String {
fn from(error: PoSQLTimestampError) -> Self {
error.to_string()
}
}
4 changes: 1 addition & 3 deletions crates/proof-of-sql-parser/src/intermediate_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
* https://docs.rs/vervolg/latest/vervolg/ast/enum.Statement.html
***/

use crate::{
intermediate_decimal::IntermediateDecimal, posql_time::timestamp::PoSQLTimestamp, Identifier,
};
use crate::{intermediate_decimal::IntermediateDecimal, posql_time::PoSQLTimestamp, Identifier};
use serde::{Deserialize, Serialize};

/// Representation of a SetExpression, a collection of rows, each having one or more columns.
Expand Down
7 changes: 4 additions & 3 deletions crates/proof-of-sql-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ extern crate lalrpop_util;

pub mod intermediate_ast;

#[cfg(test)]
#[cfg(all(test, feature = "parser-test-utility"))]
mod intermediate_ast_tests;

#[cfg(test)]
pub(crate) mod test_utility;
#[cfg(feature = "parser-test-utility")]
/// Shortcuts to construct intermediate AST nodes.
pub mod test_utility;

pub(crate) mod select_statement;
pub use select_statement::SelectStatement;
Expand Down
45 changes: 45 additions & 0 deletions crates/proof-of-sql-parser/src/posql_time/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use serde::{Deserialize, Serialize};
use thiserror::Error;

/// Errors related to time operations, including timezone and timestamp conversions.s
#[derive(Error, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum PoSQLTimestampError {
/// Error when the timezone string provided cannot be parsed into a valid timezone.
#[error("invalid timezone string: {0}")]
InvalidTimezone(String),

/// Error indicating an invalid timezone offset was provided.
#[error("invalid timezone offset")]
InvalidTimezoneOffset,

/// Indicates a failure to convert between different representations of time units.
#[error("Invalid time unit")]
InvalidTimeUnit(String),

/// The local time does not exist because there is a gap in the local time.
/// This variant may also be returned if there was an error while resolving the local time,
/// caused by for example missing time zone data files, an error in an OS API, or overflow.
#[error("Local time does not exist because there is a gap in the local time")]
LocalTimeDoesNotExist,

/// The local time is ambiguous because there is a fold in the local time.
/// This variant contains the two possible results, in the order (earliest, latest).
#[error("Unix timestamp is ambiguous because there is a fold in the local time.")]
Ambiguous(String),

/// Represents a catch-all for parsing errors not specifically covered by other variants.
#[error("Timestamp parsing error: {0}")]
ParsingError(String),

/// Represents a failure to parse a provided time unit precision value, PoSQL supports
/// Seconds, Milliseconds, Microseconds, and Nanoseconds
#[error("Timestamp parsing error: {0}")]
UnsupportedPrecision(String),
}

// This exists because TryFrom<DataType> for ColumnType error is String
impl From<PoSQLTimestampError> for String {
fn from(error: PoSQLTimestampError) -> Self {
error.to_string()
}
}
12 changes: 9 additions & 3 deletions crates/proof-of-sql-parser/src/posql_time/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
mod error;
/// Errors related to time operations, including timezone and timestamp conversions.
pub use error::PoSQLTimestampError;
mod timestamp;
/// Defines an RFC3339-formatted timestamp
pub mod timestamp;
pub use timestamp::PoSQLTimestamp;
mod timezone;
/// Defines a timezone as count of seconds offset from UTC
pub mod timezone;
pub use timezone::PoSQLTimeZone;
mod unit;
/// Defines the precision of the timestamp
pub mod unit;
pub use unit::PoSQLTimeUnit;
15 changes: 7 additions & 8 deletions crates/proof-of-sql-parser/src/posql_time/timestamp.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use super::{timezone, unit::PoSQLTimeUnit};
use crate::error::PoSQLTimestampError;
use super::{PoSQLTimeUnit, PoSQLTimeZone, PoSQLTimestampError};
use chrono::{offset::LocalResult, DateTime, TimeZone, Utc};
use serde::{Deserialize, Serialize};

Expand All @@ -13,7 +12,7 @@ pub struct PoSQLTimestamp {
pub timeunit: PoSQLTimeUnit,

/// The timezone of the datetime, either UTC or a fixed offset from UTC.
pub timezone: timezone::PoSQLTimeZone,
pub timezone: PoSQLTimeZone,
}

impl PoSQLTimestamp {
Expand All @@ -32,7 +31,7 @@ impl PoSQLTimestamp {
/// # Examples
/// ```
/// use chrono::{DateTime, Utc};
/// use proof_of_sql_parser::posql_time::{timestamp::PoSQLTimestamp, timezone::PoSQLTimeZone};
/// use proof_of_sql_parser::posql_time::{PoSQLTimestamp, PoSQLTimeZone};
///
/// // Parsing an RFC 3339 timestamp without a timezone:
/// let timestamp_str = "2009-01-03T18:15:05Z";
Expand All @@ -49,7 +48,7 @@ impl PoSQLTimestamp {
.map_err(|e| PoSQLTimestampError::ParsingError(e.to_string()))?;

let offset_seconds = dt.offset().local_minus_utc();
let timezone = timezone::PoSQLTimeZone::from_offset(offset_seconds);
let timezone = PoSQLTimeZone::from_offset(offset_seconds);
let nanoseconds = dt.timestamp_subsec_nanos();
let timeunit = if nanoseconds % 1_000 != 0 {
PoSQLTimeUnit::Nanosecond
Expand Down Expand Up @@ -78,7 +77,7 @@ impl PoSQLTimestamp {
/// # Examples
/// ```
/// use chrono::{DateTime, Utc};
/// use proof_of_sql_parser::posql_time::{timestamp::PoSQLTimestamp, timezone::PoSQLTimeZone};
/// use proof_of_sql_parser::posql_time::{PoSQLTimestamp, PoSQLTimeZone};
///
/// // Parsing a Unix epoch timestamp (assumed to be seconds and UTC):
/// let unix_time = 1231006505;
Expand All @@ -90,7 +89,7 @@ impl PoSQLTimestamp {
LocalResult::Single(timestamp) => Ok(PoSQLTimestamp {
timestamp,
timeunit: PoSQLTimeUnit::Second,
timezone: timezone::PoSQLTimeZone::Utc,
timezone: PoSQLTimeZone::Utc,
}),
LocalResult::Ambiguous(earliest, latest) => Err(PoSQLTimestampError::Ambiguous(
format!("The local time is ambiguous because there is a fold in the local time: earliest: {} latest: {} ", earliest, latest),
Expand All @@ -107,7 +106,7 @@ mod tests {
#[test]
fn test_unix_epoch_time_timezone() {
let unix_time = 1231006505; // Unix time as string
let expected_timezone = timezone::PoSQLTimeZone::Utc; // Unix time should always be UTC
let expected_timezone = PoSQLTimeZone::Utc; // Unix time should always be UTC
let result = PoSQLTimestamp::to_timestamp(unix_time).unwrap();
assert_eq!(result.timezone, expected_timezone);
}
Expand Down
2 changes: 1 addition & 1 deletion crates/proof-of-sql-parser/src/posql_time/timezone.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::error::PoSQLTimestampError;
use super::PoSQLTimestampError;
use core::fmt;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
Expand Down
4 changes: 2 additions & 2 deletions crates/proof-of-sql-parser/src/posql_time/unit.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::error::PoSQLTimestampError;
use super::PoSQLTimestampError;
use arrow::datatypes::TimeUnit as ArrowTimeUnit;
use core::fmt;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -68,7 +68,7 @@ impl fmt::Display for PoSQLTimeUnit {
#[allow(deprecated)]
mod time_unit_tests {
use super::*;
use crate::{error::PoSQLTimestampError, posql_time::timestamp::PoSQLTimestamp};
use crate::posql_time::{PoSQLTimestamp, PoSQLTimestampError};
use chrono::{TimeZone, Utc};

#[test]
Expand Down
2 changes: 1 addition & 1 deletion crates/proof-of-sql-parser/src/sql.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::intermediate_ast;
use crate::select_statement;
use crate::identifier;
use lalrpop_util::ParseError::User;
use crate::{intermediate_decimal::IntermediateDecimal, posql_time::timestamp::PoSQLTimestamp};
use crate::{intermediate_decimal::IntermediateDecimal, posql_time::PoSQLTimestamp};

grammar;

Expand Down
Loading

0 comments on commit c63d33b

Please sign in to comment.