From 8b82373ab42681b1458eae90903c7396ffdf3c78 Mon Sep 17 00:00:00 2001 From: Dustin Ray <40841027+drcapybara@users.noreply.github.com> Date: Tue, 9 Jul 2024 06:13:38 -0700 Subject: [PATCH] feat!: timestamp literal support (#28) # RFC 3339-compliant Timestamp Parsing To ensure our timestamp parsing aligns closely with RFC 3339 standards, the following tests have been proposed to verify each aspect of the timestamp formatting and parsing process: ## Current Capabilities - Date and Time with Timezone: - [x] `2009-01-03T18:15:05Z` (UTC timezone) - [x] `2009-01-03T18:15:05+02:30` (Positive timezone offset) - [x] `2009-01-03T18:15:05-07:00` (Negative timezone offset) - Date and Time with Fractional Seconds: - [x] `2009-01-03T18:15:05.123Z` (Milliseconds) - [x] `2009-01-03T18:15:05.123456Z` (Microseconds) - [x] `2009-01-03T18:15:05.123456789Z` (Nanoseconds) ## Unix Epoch Time Parsing Capability This module includes functionality for parsing timestamps represented as time units (e.g., seconds) since the Unix epoch (January 1, 1970, at 00:00:00 UTC). This allows for direct integration and manipulation of time data sourced from systems that utilize Unix time (POSIX time). ### Features: - **Unix Timestamp Parsing**: Capable of interpreting strings or numeric values representing seconds since the Unix epoch and converting them into a standard datetime format. - **UTC Alignment**: All parsed Unix timestamps are automatically aligned to UTC, ensuring consistency across different time-related operations. ### Example Usage: ```rust // Parsing an RFC 3339 timestamp without a timezone: let timestamp_str = "2009-01-03T18:15:05Z"; let intermediate_timestamp = IntermediateTimestamp::try_from(timestamp_str).unwrap(); assert_eq!(intermediate_timestamp.timezone, IntermediateTimeZone::Utc); // Parsing an RFC 3339 timestamp with a positive timezone offset: let timestamp_str_with_tz = "2009-01-03T18:15:05+03:00"; let intermediate_timestamp = IntermediateTimestamp::try_from(timestamp_str_with_tz).unwrap(); assert_eq!(intermediate_timestamp.timezone, IntermediateTimeZone::FixedOffset(10800)); // 3 hours in seconds // Parsing a Unix epoch timestamp (assumed to be seconds and UTC): let unix_time_str = "1231006505"; let intermediate_timestamp = IntermediateTimestamp::to_timestamp(unix_time_str).unwrap(); assert_eq!(intermediate_timestamp.timezone, IntermediateTimeZone::Utc); ``` # Tests for RFC 3339 Compliance - [x] **Test UTC Timezone Parsing** Ensure proper parsing of timestamps with the UTC timezone designator (`Z`). - [x] **Test Positive Timezone Offset** Ensure timestamps with positive timezone offsets are parsed correctly. - [x] **Test Negative Timezone Offset** Ensure timestamps with negative timezone offsets are parsed correctly. - [x] **Test Zero Timezone Offset** Validate parsing of timestamps where timezone is explicitly set to UTC with `+00:00`. - [x] **Test Unix Epoch Time Timezone** Verify that Unix epoch timestamps are assumed to be in UTC. - [x] **Test Unix Epoch Timestamp Parsing** Check parsing of Unix epoch timestamps from string representations. - [x] **Test Basic RFC 3339 Timestamp** Confirm basic parsing of RFC 3339 compliant timestamps with no timezone offset specified. - [x] **Test RFC 3339 Timestamp with Positive Offset** Test parsing of timestamps with positive timezone offsets. - [x] **Test RFC 3339 Timestamp with Negative Offset** Test parsing of timestamps with negative timezone offsets. - [x] **Test RFC 3339 Timestamp with UTC Designator** Confirm parsing of timestamps with the UTC designator (`Z`). - [x] **Test Invalid RFC 3339 Timestamp** Ensure that non-compliant strings are not parsed as valid timestamps. - [x] **Test Timestamp with Seconds Precision** Confirm that timestamps with seconds precision are parsed correctly. - [x] **Test RFC 3339 Timestamp with Milliseconds** Validate parsing of timestamps that include millisecond precision. - [x] **Test RFC 3339 Timestamp with Microseconds** Validate parsing of timestamps that include microsecond precision. - [x] **Test RFC 3339 Timestamp with Nanoseconds** Validate parsing of timestamps that include nanosecond precision. - [x] **Test General Parsing Error** Check handling of malformed timestamp inputs. - [x] **Test Basic Date-Time Support** Ensure basic RFC 3339 formatted date-times are parsed correctly. - [x] **Test Leap Seconds Handling** Verify that leap seconds are handled correctly in timestamps. - [x] **Test Rejection of Incorrect Formats** Ensure that incorrect timestamp formats are properly rejected. --- Cargo.toml | 2 +- crates/proof-of-sql-parser/Cargo.toml | 2 + crates/proof-of-sql-parser/src/error.rs | 41 +++ crates/proof-of-sql-parser/src/identifier.rs | 38 +- .../src/intermediate_ast.rs | 12 +- crates/proof-of-sql-parser/src/lib.rs | 5 +- .../proof-of-sql-parser/src/posql_time/mod.rs | 6 + .../src/posql_time/timestamp.rs | 251 +++++++++++++ .../src/posql_time/timezone.rs | 133 +++++++ .../src/posql_time/unit.rs | 85 +++++ crates/proof-of-sql-parser/src/sql.lalrpop | 24 +- crates/proof-of-sql/Cargo.toml | 2 +- .../src/base/commitment/column_bounds.rs | 10 +- .../commitment/column_commitment_metadata.rs | 32 +- .../src/base/commitment/committable_column.rs | 35 +- .../arrow_array_to_column_conversion.rs | 53 ++- .../proof-of-sql/src/base/database/column.rs | 19 +- .../src/base/database/literal_value.rs | 8 +- .../database/owned_and_arrow_conversions.rs | 56 ++- .../src/base/database/owned_column.rs | 8 +- .../src/base/database/owned_table_test.rs | 30 +- .../owned_table_test_accessor_test.rs | 6 +- .../src/base/database/owned_table_utility.rs | 18 +- .../src/base/database/record_batch_utility.rs | 12 +- .../base/database/test_accessor_utility.rs | 3 +- crates/proof-of-sql/src/base/mod.rs | 2 - crates/proof-of-sql/src/base/time/mod.rs | 4 - .../proof-of-sql/src/base/time/timestamp.rs | 106 ------ crates/proof-of-sql/src/base/time/timezone.rs | 142 -------- crates/proof-of-sql/src/sql/parse/error.rs | 6 +- .../sql/parse/provable_expr_plan_builder.rs | 3 + .../src/sql/parse/query_context_builder.rs | 3 + .../src/sql/transform/to_polars_expr.rs | 1 + .../tests/timestamp_integration_tests.rs | 344 ++++++++++++++++++ 34 files changed, 1096 insertions(+), 406 deletions(-) create mode 100644 crates/proof-of-sql-parser/src/posql_time/mod.rs create mode 100644 crates/proof-of-sql-parser/src/posql_time/timestamp.rs create mode 100644 crates/proof-of-sql-parser/src/posql_time/timezone.rs create mode 100644 crates/proof-of-sql-parser/src/posql_time/unit.rs delete mode 100644 crates/proof-of-sql/src/base/time/mod.rs delete mode 100644 crates/proof-of-sql/src/base/time/timestamp.rs delete mode 100644 crates/proof-of-sql/src/base/time/timezone.rs create mode 100644 crates/proof-of-sql/tests/timestamp_integration_tests.rs diff --git a/Cargo.toml b/Cargo.toml index 1913112a2..b48b0e77f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ bytemuck = {version = "1.14.2" } byte-slice-cast = { version = "1.2.1" } clap = { version = "4.5.4" } criterion = { version = "0.5.1" } -chrono-tz = {version = "0.9.0", features = ["serde"]} +chrono = { version = "0.4.38" } curve25519-dalek = { version = "4", features = ["rand_core"] } derive_more = { version = "0.99" } dyn_partial_eq = { version = "0.1.2" } diff --git a/crates/proof-of-sql-parser/Cargo.toml b/crates/proof-of-sql-parser/Cargo.toml index 3ffbff9e3..745c2fa02 100644 --- a/crates/proof-of-sql-parser/Cargo.toml +++ b/crates/proof-of-sql-parser/Cargo.toml @@ -15,8 +15,10 @@ doctest = true test = true [dependencies] +arrow = { workspace = true } arrayvec = { workspace = true, features = ["serde"] } bigdecimal = { workspace = true } +chrono = { workspace = true, features = ["serde"] } lalrpop-util = { workspace = true, features = ["lexer", "unicode"] } serde = { workspace = true, features = ["serde_derive"] } thiserror = { workspace = true } diff --git a/crates/proof-of-sql-parser/src/error.rs b/crates/proof-of-sql-parser/src/error.rs index 3d032f864..fbe90148c 100644 --- a/crates/proof-of-sql-parser/src/error.rs +++ b/crates/proof-of-sql-parser/src/error.rs @@ -1,3 +1,4 @@ +use serde::{Deserialize, Serialize}; use thiserror::Error; /// Errors encountered during the parsing process @@ -14,4 +15,44 @@ pub enum ParseError { ResourceIdParseError(String), } +/// General parsing error that may occur, for example if the provided schema/object_name strings +/// aren't valid postgres-style identifiers (excluding dollar signs). pub type ParseResult = std::result::Result; + +/// Errors related to time operations, including timezone and timestamp conversions.s +#[derive(Error, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum PoSQLTimestampError { + /// Error when the timezone string provided cannot be parsed into a valid timezone. + #[error("invalid timezone string: {0}")] + InvalidTimezone(String), + + /// Error indicating an invalid timezone offset was provided. + #[error("invalid timezone offset")] + InvalidTimezoneOffset, + + /// Indicates a failure to convert between different representations of time units. + #[error("Invalid time unit")] + InvalidTimeUnit(String), + + /// The local time does not exist because there is a gap in the local time. + /// This variant may also be returned if there was an error while resolving the local time, + /// caused by for example missing time zone data files, an error in an OS API, or overflow. + #[error("Local time does not exist because there is a gap in the local time")] + LocalTimeDoesNotExist, + + /// The local time is ambiguous because there is a fold in the local time. + /// This variant contains the two possible results, in the order (earliest, latest). + #[error("Unix timestamp is ambiguous because there is a fold in the local time.")] + Ambiguous(String), + + /// Represents a catch-all for parsing errors not specifically covered by other variants. + #[error("Timestamp parsing error: {0}")] + ParsingError(String), +} + +// This exists because TryFrom for ColumnType error is String +impl From for String { + fn from(error: PoSQLTimestampError) -> Self { + error.to_string() + } +} diff --git a/crates/proof-of-sql-parser/src/identifier.rs b/crates/proof-of-sql-parser/src/identifier.rs index 1d85590ef..d8ee5ddae 100644 --- a/crates/proof-of-sql-parser/src/identifier.rs +++ b/crates/proof-of-sql-parser/src/identifier.rs @@ -44,7 +44,8 @@ impl FromStr for Identifier { fn from_str(string: &str) -> ParseResult { let name = IdentifierParser::new() .parse(string) - .map_err(|e| ParseError::IdentifierParseError(format!("{:?}", e)))?; + .map_err(|e| ParseError::IdentifierParseError( + format!("failed to parse identifier, (you may have used a reserved keyword as an ID, i.e. 'timestamp') {:?}", e)))?; Ok(Identifier::new(name)) } @@ -152,6 +153,41 @@ mod tests { assert!(Identifier::from_str("GOOD_IDENTIFIER.").is_err()); assert!(Identifier::from_str(".GOOD_IDENTIFIER").is_err()); assert!(Identifier::from_str(&"LONG_IDENTIFIER_OVER_64_CHARACTERS".repeat(12)).is_err()); + + // Test for reserved keywords + let keywords = [ + "all", + "asc", + "desc", + "as", + "and", + "from", + "not", + "or", + "select", + "where", + "order", + "by", + "limit", + "offset", + "group", + "min", + "max", + "count", + "sum", + "true", + "false", + "timestamp", + "to_timestamp", + ]; + + for keyword in keywords.iter() { + assert!( + Identifier::from_str(keyword).is_err(), + "Should not parse keyword as identifier: {}", + keyword + ); + } } #[test] diff --git a/crates/proof-of-sql-parser/src/intermediate_ast.rs b/crates/proof-of-sql-parser/src/intermediate_ast.rs index ddda7e1c4..776b5955c 100644 --- a/crates/proof-of-sql-parser/src/intermediate_ast.rs +++ b/crates/proof-of-sql-parser/src/intermediate_ast.rs @@ -4,7 +4,9 @@ * https://docs.rs/vervolg/latest/vervolg/ast/enum.Statement.html ***/ -use crate::{intermediate_decimal::IntermediateDecimal, Identifier}; +use crate::{ + intermediate_decimal::IntermediateDecimal, posql_time::timestamp::PoSQLTimestamp, Identifier, +}; use serde::{Deserialize, Serialize}; /// Representation of a SetExpression, a collection of rows, each having one or more columns. @@ -328,6 +330,8 @@ pub enum Literal { VarChar(String), /// Decimal Literal Decimal(IntermediateDecimal), + /// Timestamp Literal + Timestamp(PoSQLTimestamp), } impl From for Literal { @@ -379,6 +383,12 @@ impl From for Literal { } } +impl From for Literal { + fn from(time: PoSQLTimestamp) -> Self { + Literal::Timestamp(time) + } +} + /// Helper function to append an item to a vector pub(crate) fn append(list: Vec, item: T) -> Vec { let mut result = list; diff --git a/crates/proof-of-sql-parser/src/lib.rs b/crates/proof-of-sql-parser/src/lib.rs index 1bd38217a..c53a282a5 100644 --- a/crates/proof-of-sql-parser/src/lib.rs +++ b/crates/proof-of-sql-parser/src/lib.rs @@ -2,6 +2,8 @@ /// Module for handling an intermediate decimal type received from the lexer. pub mod intermediate_decimal; +/// Module for handling an intermediate timestamp type received from the lexer. +pub mod posql_time; #[macro_use] extern crate lalrpop_util; @@ -16,7 +18,8 @@ pub(crate) mod test_utility; pub(crate) mod select_statement; pub use select_statement::SelectStatement; -pub(crate) mod error; +/// Error definitions for proof-of-sql-parser +pub mod error; pub use error::ParseError; pub(crate) use error::ParseResult; diff --git a/crates/proof-of-sql-parser/src/posql_time/mod.rs b/crates/proof-of-sql-parser/src/posql_time/mod.rs new file mode 100644 index 000000000..44731fc58 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/mod.rs @@ -0,0 +1,6 @@ +/// Defines an RFC3339-formatted timestamp +pub mod timestamp; +/// Defines a timezone as count of seconds offset from UTC +pub mod timezone; +/// Defines the precision of the timestamp +pub mod unit; diff --git a/crates/proof-of-sql-parser/src/posql_time/timestamp.rs b/crates/proof-of-sql-parser/src/posql_time/timestamp.rs new file mode 100644 index 000000000..336dd5350 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/timestamp.rs @@ -0,0 +1,251 @@ +use super::{timezone, unit::PoSQLTimeUnit}; +use crate::error::PoSQLTimestampError; +use chrono::{offset::LocalResult, DateTime, TimeZone, Utc}; +use serde::{Deserialize, Serialize}; + +/// Represents a fully parsed timestamp with detailed time unit and timezone information +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PoSQLTimestamp { + /// The datetime representation in UTC. + pub timestamp: DateTime, + + /// The precision of the datetime value, e.g., seconds, milliseconds. + pub timeunit: PoSQLTimeUnit, + + /// The timezone of the datetime, either UTC or a fixed offset from UTC. + pub timezone: timezone::PoSQLTimeZone, +} + +impl PoSQLTimestamp { + /// Attempts to parse a timestamp string into an [PoSQLTimestamp] structure. + /// This function supports two primary formats: + /// + /// 1. **RFC 3339 Parsing**: + /// - Parses the timestamp along with its timezone. + /// - If parsing succeeds, it extracts the timezone offset using `dt.offset().local_minus_utc()` + /// and then uses this to construct the appropriate `PoSQLTimeZone`. + /// + /// 2. **Timezone Parsing and Conversion**: + /// - The `from_offset` method is used to determine whether the timezone should be represented + /// as `Utc` or `FixedOffset`. This function simplifies the decision based on the offset value. + /// + /// # Examples + /// ``` + /// use chrono::{DateTime, Utc}; + /// use proof_of_sql_parser::posql_time::{timestamp::PoSQLTimestamp, timezone::PoSQLTimeZone}; + /// + /// // Parsing an RFC 3339 timestamp without a timezone: + /// let timestamp_str = "2009-01-03T18:15:05Z"; + /// let intermediate_timestamp = PoSQLTimestamp::try_from(timestamp_str).unwrap(); + /// assert_eq!(intermediate_timestamp.timezone, PoSQLTimeZone::Utc); + /// + /// // Parsing an RFC 3339 timestamp with a positive timezone offset: + /// let timestamp_str_with_tz = "2009-01-03T18:15:05+03:00"; + /// let intermediate_timestamp = PoSQLTimestamp::try_from(timestamp_str_with_tz).unwrap(); + /// assert_eq!(intermediate_timestamp.timezone, PoSQLTimeZone::FixedOffset(10800)); // 3 hours in seconds + /// ``` + pub fn try_from(timestamp_str: &str) -> Result { + let dt = DateTime::parse_from_rfc3339(timestamp_str) + .map_err(|e| PoSQLTimestampError::ParsingError(e.to_string()))?; + + let offset_seconds = dt.offset().local_minus_utc(); + let timezone = timezone::PoSQLTimeZone::from_offset(offset_seconds); + let nanoseconds = dt.timestamp_subsec_nanos(); + let timeunit = if nanoseconds % 1_000 != 0 { + PoSQLTimeUnit::Nanosecond + } else if nanoseconds % 1_000_000 != 0 { + PoSQLTimeUnit::Microsecond + } else if nanoseconds % 1_000_000_000 != 0 { + PoSQLTimeUnit::Millisecond + } else { + PoSQLTimeUnit::Second + }; + + Ok(PoSQLTimestamp { + timestamp: dt.with_timezone(&Utc), + timeunit, + timezone, + }) + } + + /// Attempts to parse a timestamp string into an `PoSQLTimestamp` structure. + /// This function supports two primary formats: + /// + /// **Unix Epoch Time Parsing**: + /// - Since Unix epoch timestamps don't inherently carry timezone information, + /// any Unix time parsed directly from an integer is assumed to be in UTC. + /// + /// # Examples + /// ``` + /// use chrono::{DateTime, Utc}; + /// use proof_of_sql_parser::posql_time::{timestamp::PoSQLTimestamp, timezone::PoSQLTimeZone}; + /// + /// // Parsing a Unix epoch timestamp (assumed to be seconds and UTC): + /// let unix_time = 1231006505; + /// let intermediate_timestamp = PoSQLTimestamp::to_timestamp(unix_time).unwrap(); + /// assert_eq!(intermediate_timestamp.timezone, PoSQLTimeZone::Utc); + /// ``` + pub fn to_timestamp(epoch: i64) -> Result { + match Utc.timestamp_opt(epoch, 0) { + LocalResult::Single(timestamp) => Ok(PoSQLTimestamp { + timestamp, + timeunit: PoSQLTimeUnit::Second, + timezone: timezone::PoSQLTimeZone::Utc, + }), + LocalResult::Ambiguous(earliest, latest) => Err(PoSQLTimestampError::Ambiguous( + format!("The local time is ambiguous because there is a fold in the local time: earliest: {} latest: {} ", earliest, latest), + )), + LocalResult::None => Err(PoSQLTimestampError::LocalTimeDoesNotExist), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_unix_epoch_time_timezone() { + let unix_time = 1231006505; // Unix time as string + let expected_timezone = timezone::PoSQLTimeZone::Utc; // Unix time should always be UTC + let result = PoSQLTimestamp::to_timestamp(unix_time).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_unix_epoch_timestamp_parsing() { + let unix_time = 1231006505; // Example Unix timestamp (seconds since epoch) + let expected_datetime = Utc.timestamp_opt(unix_time, 0).unwrap(); + let expected_unit = PoSQLTimeUnit::Second; // Assuming basic second precision for Unix timestamp + let input = unix_time; // Simulate input as string since Unix times are often transmitted as strings + let result = PoSQLTimestamp::to_timestamp(input).unwrap(); + + assert_eq!(result.timestamp, expected_datetime); + assert_eq!(result.timeunit, expected_unit); + } + + #[test] + fn test_basic_rfc3339_timestamp() { + let input = "2023-06-26T12:34:56Z"; + let expected = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap(); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_positive_offset() { + let input = "2023-06-26T08:00:00+04:30"; + let expected = Utc.with_ymd_and_hms(2023, 6, 26, 3, 30, 0).unwrap(); // Adjusted to UTC + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_negative_offset() { + let input = "2023-06-26T20:00:00-05:00"; + let expected = Utc.with_ymd_and_hms(2023, 6, 27, 1, 0, 0).unwrap(); // Adjusted to UTC + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_utc_designator() { + let input = "2023-06-26T12:34:56Z"; + let expected = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap(); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_invalid_rfc3339_timestamp() { + let input = "not-a-timestamp"; + assert_eq!( + PoSQLTimestamp::try_from(input), + Err(PoSQLTimestampError::ParsingError( + "input contains invalid characters".into() + )) + ); + } + + #[test] + fn test_timestamp_with_seconds() { + let input = "2023-06-26T12:34:56Z"; + let expected_time = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap(); + let expected_unit = PoSQLTimeUnit::Second; + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected_time); + assert_eq!(result.timeunit, expected_unit); + } + + #[test] + fn test_general_parsing_error() { + // This test assumes that there's a catch-all parsing error case that isn't covered by the more specific errors. + let malformed_input = "2009-01-03T::00Z"; // Intentionally malformed timestamp + let result = PoSQLTimestamp::try_from(malformed_input); + assert!(matches!(result, Err(PoSQLTimestampError::ParsingError(_)))); + } + + #[test] + fn test_basic_date_time_support() { + let inputs = ["2009-01-03T18:15:05Z", "2009-01-03T18:15:05+02:00"]; + for input in inputs { + assert!( + DateTime::parse_from_rfc3339(input).is_ok(), + "Should parse correctly: {}", + input + ); + } + } + + #[test] + fn test_leap_seconds() { + let input = "1998-12-31T23:59:60Z"; // fyi the 59:-->60<-- is the leap second + assert!(PoSQLTimestamp::try_from(input).is_ok()); + } + + #[test] + fn test_leap_seconds_ranges() { + // Timestamp just before the leap second + let before_leap_second = "1998-12-31T23:59:59Z"; + // Timestamp during the leap second + let leap_second = "1998-12-31T23:59:60Z"; + // Timestamp just after the leap second + let after_leap_second = "1999-01-01T00:00:00Z"; + + // Parse timestamps + let before_leap_dt = PoSQLTimestamp::try_from(before_leap_second).unwrap(); + let leap_second_dt = PoSQLTimestamp::try_from(leap_second).unwrap(); + dbg!(&leap_second_dt.timestamp.timestamp()); + let after_leap_dt = PoSQLTimestamp::try_from(after_leap_second).unwrap(); + + // Ensure that "23:59:60Z" - 1 second is considered equivalent to "23:59:59Z" + assert_eq!( + before_leap_dt.timestamp, + leap_second_dt.timestamp - chrono::Duration::seconds(1) + ); + + // Ensure that "23:59:60Z" + 1 second is "1999-01-01T00:00:00Z" + assert_eq!( + after_leap_dt.timestamp, + leap_second_dt.timestamp + chrono::Duration::seconds(1) + ); + } + + #[test] + fn test_rejecting_incorrect_formats() { + let incorrect_formats = [ + "2009-January-03", + "25:61:61", + "20090103", + "181505", + "18:15:05", + ]; + for input in incorrect_formats { + assert!( + DateTime::parse_from_rfc3339(input).is_err(), + "Should reject incorrect format: {}", + input + ); + } + } +} diff --git a/crates/proof-of-sql-parser/src/posql_time/timezone.rs b/crates/proof-of-sql-parser/src/posql_time/timezone.rs new file mode 100644 index 000000000..7f9a21b17 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/timezone.rs @@ -0,0 +1,133 @@ +use crate::error::PoSQLTimestampError; +use core::fmt; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +/// Captures a timezone from a timestamp query +#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq)] +pub enum PoSQLTimeZone { + /// Default variant for UTC timezone + Utc, + /// TImezone offset in seconds + FixedOffset(i32), +} + +impl PoSQLTimeZone { + /// Parse a timezone from a count of seconds + pub fn from_offset(offset: i32) -> Self { + if offset == 0 { + PoSQLTimeZone::Utc + } else { + PoSQLTimeZone::FixedOffset(offset) + } + } +} + +impl TryFrom<&Option>> for PoSQLTimeZone { + type Error = PoSQLTimestampError; + + fn try_from(value: &Option>) -> Result { + match value { + Some(tz_str) => { + let tz = Arc::as_ref(tz_str).to_uppercase(); + match tz.as_str() { + "Z" | "UTC" | "00:00" | "+00:00" | "0:00" | "+0:00" => Ok(PoSQLTimeZone::Utc), + tz if tz.chars().count() == 6 + && (tz.starts_with('+') || tz.starts_with('-')) => + { + let sign = if tz.starts_with('-') { -1 } else { 1 }; + let hours = tz[1..3] + .parse::() + .map_err(|_| PoSQLTimestampError::InvalidTimezoneOffset)?; + let minutes = tz[4..6] + .parse::() + .map_err(|_| PoSQLTimestampError::InvalidTimezoneOffset)?; + let total_seconds = sign * ((hours * 3600) + (minutes * 60)); + Ok(PoSQLTimeZone::FixedOffset(total_seconds)) + } + _ => Err(PoSQLTimestampError::InvalidTimezone(tz.to_string())), + } + } + None => Ok(PoSQLTimeZone::Utc), + } + } +} + +impl fmt::Display for PoSQLTimeZone { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + PoSQLTimeZone::Utc => { + write!(f, "00:00") + } + PoSQLTimeZone::FixedOffset(seconds) => { + let hours = seconds / 3600; + let minutes = (seconds.abs() % 3600) / 60; + if seconds < 0 { + write!(f, "-{:02}:{:02}", hours.abs(), minutes) + } else { + write!(f, "+{:02}:{:02}", hours, minutes) + } + } + } + } +} + +#[cfg(test)] +mod timezone_parsing_tests { + use crate::posql_time::timezone; + + #[test] + fn test_display_fixed_offset_positive() { + let timezone = timezone::PoSQLTimeZone::FixedOffset(4500); // +01:15 + assert_eq!(format!("{}", timezone), "+01:15"); + } + + #[test] + fn test_display_fixed_offset_negative() { + let timezone = timezone::PoSQLTimeZone::FixedOffset(-3780); // -01:03 + assert_eq!(format!("{}", timezone), "-01:03"); + } + + #[test] + fn test_display_utc() { + let timezone = timezone::PoSQLTimeZone::Utc; + assert_eq!(format!("{}", timezone), "00:00"); + } +} + +#[cfg(test)] +mod timezone_offset_tests { + use crate::posql_time::{timestamp::PoSQLTimestamp, timezone}; + + #[test] + fn test_utc_timezone() { + let input = "2023-06-26T12:34:56Z"; + let expected_timezone = timezone::PoSQLTimeZone::Utc; + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_positive_offset_timezone() { + let input = "2023-06-26T12:34:56+03:30"; + let expected_timezone = timezone::PoSQLTimeZone::from_offset(12600); // 3 hours and 30 minutes in seconds + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_negative_offset_timezone() { + let input = "2023-06-26T12:34:56-04:00"; + let expected_timezone = timezone::PoSQLTimeZone::from_offset(-14400); // -4 hours in seconds + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_zero_offset_timezone() { + let input = "2023-06-26T12:34:56+00:00"; + let expected_timezone = timezone::PoSQLTimeZone::Utc; // Zero offset defaults to UTC + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } +} diff --git a/crates/proof-of-sql-parser/src/posql_time/unit.rs b/crates/proof-of-sql-parser/src/posql_time/unit.rs new file mode 100644 index 000000000..c97919f90 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/unit.rs @@ -0,0 +1,85 @@ +use arrow::datatypes::TimeUnit as ArrowTimeUnit; +use core::fmt; +use serde::{Deserialize, Serialize}; + +/// An intermediate type representing the time units from a parsed query +#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq)] +pub enum PoSQLTimeUnit { + /// Represents seconds with precision 0: ex "2024-06-20 12:34:56" + Second, + /// Represents milliseconds with precision 3: ex "2024-06-20 12:34:56.123" + Millisecond, + /// Represents microseconds with precision 6: ex "2024-06-20 12:34:56.123456" + Microsecond, + /// Represents nanoseconds with precision 9: ex "2024-06-20 12:34:56.123456789" + Nanosecond, +} + +impl From for ArrowTimeUnit { + fn from(unit: PoSQLTimeUnit) -> Self { + match unit { + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + } + } +} + +impl From for PoSQLTimeUnit { + fn from(unit: ArrowTimeUnit) -> Self { + match unit { + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, + } + } +} + +impl fmt::Display for PoSQLTimeUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PoSQLTimeUnit::Second => write!(f, "seconds (precision: 0)"), + PoSQLTimeUnit::Millisecond => write!(f, "milliseconds (precision: 3)"), + PoSQLTimeUnit::Microsecond => write!(f, "microseconds (precision: 6)"), + PoSQLTimeUnit::Nanosecond => write!(f, "nanoseconds (precision: 9)"), + } + } +} + +// allow(deprecated) for the sole purpose of testing that +// timestamp precision is parsed correctly. +#[cfg(test)] +#[allow(deprecated)] +mod time_unit_tests { + + use crate::posql_time::{timestamp::PoSQLTimestamp, unit::PoSQLTimeUnit}; + use chrono::{TimeZone, Utc}; + + #[test] + fn test_rfc3339_timestamp_with_milliseconds() { + let input = "2023-06-26T12:34:56.123Z"; + let expected = Utc.ymd(2023, 6, 26).and_hms_milli(12, 34, 56, 123); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timeunit, PoSQLTimeUnit::Millisecond); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_microseconds() { + let input = "2023-06-26T12:34:56.123456Z"; + let expected = Utc.ymd(2023, 6, 26).and_hms_micro(12, 34, 56, 123456); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timeunit, PoSQLTimeUnit::Microsecond); + assert_eq!(result.timestamp, expected); + } + #[test] + fn test_rfc3339_timestamp_with_nanoseconds() { + let input = "2023-06-26T12:34:56.123456789Z"; + let expected = Utc.ymd(2023, 6, 26).and_hms_nano(12, 34, 56, 123456789); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timeunit, PoSQLTimeUnit::Nanosecond); + assert_eq!(result.timestamp, expected); + } +} diff --git a/crates/proof-of-sql-parser/src/sql.lalrpop b/crates/proof-of-sql-parser/src/sql.lalrpop index dec34a067..ae86ca4b3 100644 --- a/crates/proof-of-sql-parser/src/sql.lalrpop +++ b/crates/proof-of-sql-parser/src/sql.lalrpop @@ -2,7 +2,7 @@ use crate::intermediate_ast; use crate::select_statement; use crate::identifier; use lalrpop_util::ParseError::User; -use crate::intermediate_decimal::IntermediateDecimal; +use crate::{intermediate_decimal::IntermediateDecimal, posql_time::timestamp::PoSQLTimestamp}; grammar; @@ -337,6 +337,10 @@ LiteralValue: Box = { }, => Box::new(intermediate_ast::Literal::Decimal(value)), + + => Box::new(intermediate_ast::Literal::Timestamp(value)), + + => Box::new(intermediate_ast::Literal::Timestamp(value)), }; Int128UnaryNumericLiteral: i128 = { @@ -373,6 +377,20 @@ pub BooleanLiteral: bool = { "false" => false, }; +TimestampLiteral: PoSQLTimestamp = { + "timestamp" =>? { + PoSQLTimestamp::try_from(content.trim_matches('\'').trim()) + .map_err(|_| User { error: "unable to parse timestamp from query" }) + }, +}; + +UnixTimestampLiteral: PoSQLTimestamp = { + // Handling the to_timestamp function with numeric input + "to_timestamp" "(" ")" =>? { + PoSQLTimestamp::to_timestamp(epoch).map_err(|_| User { error: "unable to parse timestamp from query" }) + }, +}; + //////////////////////////////////////////////////////////////////////////////////////////////// // Tokens //////////////////////////////////////////////////////////////////////////////////////////////// @@ -413,7 +431,9 @@ match { r"[sS][uU][mM]" => "sum", r"[tT][rR][uU][eE]" => "true", r"[fF][aA][lL][sS][eE]" => "false", - + r"[tT][iI][mM][eE][sS][tT][aA][mM][pP]" => "timestamp", + r"[tT][oO]_[tT][iI][mM][eE][sS][tT][aA][mM][pP]" => "to_timestamp", + "," => ",", "." => ".", "(" => "(", diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index d6a0736e9..bd9a6808f 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -30,7 +30,7 @@ bumpalo = { workspace = true, features = ["collections"] } bytemuck = { workspace = true } byte-slice-cast = { workspace = true } curve25519-dalek = { workspace = true, features = ["serde"] } -chrono-tz = {workspace = true, features = ["serde"]} +chrono = {workspace = true, features = ["serde"]} derive_more = { workspace = true } dyn_partial_eq = { workspace = true } hashbrown = { workspace = true } diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 0badc567f..2ee627088 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -288,13 +288,9 @@ impl ColumnBounds { #[cfg(test)] mod tests { use super::*; - use crate::base::{ - database::OwnedColumn, - math::decimal::Precision, - scalar::Curve25519Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, - }; + use crate::base::{database::OwnedColumn, math::decimal::Precision, scalar::Curve25519Scalar}; use itertools::Itertools; + use proof_of_sql_parser::posql_time::{timezone, unit::PoSQLTimeUnit}; #[test] fn we_can_construct_bounds_by_method() { @@ -537,7 +533,7 @@ mod tests { let timestamp_column = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + timezone::PoSQLTimeZone::Utc, vec![1_i64, 2, 3, 4], ); let committable_timestamp_column = CommittableColumn::from(×tamp_column); diff --git a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs index 2e0d9876d..42b8e7fa9 100644 --- a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs +++ b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs @@ -163,13 +163,15 @@ impl ColumnCommitmentMetadata { #[cfg(test)] mod tests { + use super::*; use crate::base::{ - commitment::column_bounds::Bounds, - database::OwnedColumn, - math::decimal::Precision, + commitment::column_bounds::Bounds, database::OwnedColumn, math::decimal::Precision, scalar::Curve25519Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, + }; + use proof_of_sql_parser::posql_time::{ + timezone::{self, PoSQLTimeZone}, + unit::PoSQLTimeUnit, }; #[test] @@ -229,12 +231,12 @@ mod tests { assert_eq!( ColumnCommitmentMetadata::try_new( - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc), ColumnBounds::TimestampTZ(Bounds::Empty), ) .unwrap(), ColumnCommitmentMetadata { - column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc), bounds: ColumnBounds::TimestampTZ(Bounds::Empty), } ); @@ -372,7 +374,7 @@ mod tests { let timestamp_column: OwnedColumn = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + timezone::PoSQLTimeZone::Utc, [1i64, 2, 3, 4, 5].to_vec(), ); let committable_timestamp_column = CommittableColumn::from(×tamp_column); @@ -380,7 +382,7 @@ mod tests { ColumnCommitmentMetadata::from_column(&committable_timestamp_column); assert_eq!( timestamp_metadata.column_type(), - &ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + &ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc) ); if let ColumnBounds::TimestampTZ(Bounds::Sharp(bounds)) = timestamp_metadata.bounds() { assert_eq!(bounds.min(), &1); @@ -534,7 +536,7 @@ mod tests { 1_625_072_400, 1_625_065_000, ]; - let timezone = PoSQLTimeZone::UTC; + let timezone = timezone::PoSQLTimeZone::Utc; let timeunit = PoSQLTimeUnit::Second; let timestamp_column_a = CommittableColumn::TimestampTZ(timeunit, timezone, ×[..2]); let timestamp_metadata_a = ColumnCommitmentMetadata::from_column(×tamp_column_a); @@ -560,7 +562,7 @@ mod tests { 1_625_072_400, 1_625_065_000, ]; - let timezone = PoSQLTimeZone::UTC; + let timezone = timezone::PoSQLTimeZone::Utc; let timeunit = PoSQLTimeUnit::Second; let timestamp_column_a = CommittableColumn::TimestampTZ(timeunit, timezone, ×[..2]); @@ -857,12 +859,18 @@ mod tests { .is_err()); let timestamp_tz_metadata_a = ColumnCommitmentMetadata { - column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + column_type: ColumnType::TimestampTZ( + PoSQLTimeUnit::Second, + timezone::PoSQLTimeZone::Utc, + ), bounds: ColumnBounds::TimestampTZ(Bounds::Empty), }; let timestamp_tz_metadata_b = ColumnCommitmentMetadata { - column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Millisecond, PoSQLTimeZone::UTC), + column_type: ColumnType::TimestampTZ( + PoSQLTimeUnit::Millisecond, + timezone::PoSQLTimeZone::Utc, + ), bounds: ColumnBounds::TimestampTZ(Bounds::Empty), }; diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index e8fc9dc16..e7a006dae 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -3,10 +3,10 @@ use crate::base::{ math::decimal::Precision, ref_into::RefInto, scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; #[cfg(feature = "blitzar")] use blitzar::sequence::Sequence; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; /// Column data in "committable form". /// @@ -194,10 +194,7 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { #[cfg(all(test, feature = "blitzar"))] mod tests { use super::*; - use crate::{ - base::{scalar::Curve25519Scalar, time::timezone::PoSQLTimeZone}, - proof_primitive::dory::DoryScalar, - }; + use crate::{base::scalar::Curve25519Scalar, proof_primitive::dory::DoryScalar}; use blitzar::compute::compute_curve25519_commitments; use curve25519_dalek::ristretto::CompressedRistretto; @@ -227,24 +224,24 @@ mod tests { fn we_can_get_type_and_length_of_timestamp_column() { // empty case let committable_column = - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]); + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]); assert_eq!(committable_column.len(), 0); assert!(committable_column.is_empty()); assert_eq!( committable_column.column_type(), - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc) ); let committable_column = CommittableColumn::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, &[12, 34, 56], ); assert_eq!(committable_column.len(), 3); assert!(!committable_column.is_empty()); assert_eq!( committable_column.column_type(), - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc) ); } @@ -393,12 +390,12 @@ mod tests { let from_borrowed_column = CommittableColumn::from(&Column::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, &[], )); assert_eq!( from_borrowed_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); // non-empty case @@ -406,12 +403,12 @@ mod tests { let from_borrowed_column = CommittableColumn::from(&Column::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, ×tamps, )); assert_eq!( from_borrowed_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, ×tamps) ); } @@ -574,26 +571,26 @@ mod tests { // empty case let owned_column = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, Vec::new(), ); let from_owned_column = CommittableColumn::from(&owned_column); assert_eq!( from_owned_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); // non-empty case let timestamps = vec![1625072400, 1625076000, 1625083200]; let owned_column = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, timestamps.clone(), ); let from_owned_column = CommittableColumn::from(&owned_column); assert_eq!( from_owned_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, ×tamps) ); } @@ -880,7 +877,7 @@ mod tests { fn we_can_commit_to_timestamp_column_through_committable_column() { // Empty case let committable_column = - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]); + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]); let sequence = Sequence::from(&committable_column); let mut commitment_buffer = [CompressedRistretto::default()]; compute_curve25519_commitments(&mut commitment_buffer, &[sequence], 0); @@ -889,7 +886,7 @@ mod tests { // Non-empty case let timestamps = [1625072400, 1625076000, 1625083200]; let committable_column = - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps); + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, ×tamps); let sequence_actual = Sequence::from(&committable_column); let sequence_expected = Sequence::from(timestamps.as_slice()); diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index cc3b78cb8..8b4c1f712 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -1,11 +1,6 @@ use super::scalar_and_i256_conversions::convert_i256_to_scalar; use crate::{ - base::{ - database::Column, - math::decimal::Precision, - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, - }, + base::{database::Column, math::decimal::Precision, scalar::Scalar}, sql::parse::ConversionError, }; use arrow::{ @@ -17,6 +12,10 @@ use arrow::{ datatypes::{i256, DataType, TimeUnit as ArrowTimeUnit}, }; use bumpalo::Bump; +use proof_of_sql_parser::{ + error::PoSQLTimestampError, + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, +}; use std::ops::Range; use thiserror::Error; @@ -38,9 +37,9 @@ pub enum ArrowArrayToColumnConversionError { /// Variant for conversion errors #[error("conversion error: {0}")] ConversionError(#[from] ConversionError), - /// Variant for timezone conversion errors, i.e. invalid timezone - #[error("Timezone conversion failed: {0}")] - TimezoneConversionError(String), + /// Using TimeError to handle all time-related errors + #[error(transparent)] + TimestampConversionError(#[from] PoSQLTimestampError), } /// This trait is used to provide utility functions to convert ArrayRefs into proof types (Column, Scalars, etc.) @@ -280,7 +279,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -293,7 +292,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Millisecond, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -306,7 +305,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Microsecond, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -319,7 +318,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Nanosecond, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -371,13 +370,13 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Z"), )); let result = array.to_column::(&alloc, &(1..3), None); assert_eq!( result.unwrap(), - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[1..3]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &data[1..3]) ); } @@ -387,7 +386,7 @@ mod tests { let data = vec![1625072400, 1625076000]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("+00:00"), )); let result = array @@ -395,7 +394,7 @@ mod tests { .unwrap(); assert_eq!( result, - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); } @@ -405,13 +404,13 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("+0:00"), )); let result = array.to_column::(&alloc, &(1..1), None); assert_eq!( result.unwrap(), - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); } @@ -421,7 +420,7 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("Utc"), )); let result = array.to_column::(&alloc, &(3..5), None); @@ -437,7 +436,7 @@ mod tests { let data = vec![Some(1625072400), None, Some(1625083200)]; let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("00:00"), )); let result = array.to_column::(&alloc, &(0..3), None); @@ -1004,7 +1003,7 @@ mod tests { .unwrap(); assert_eq!( result, - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[..]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &data[..]) ); } @@ -1058,7 +1057,7 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Utc"), )); // Test using a range smaller than the array size @@ -1066,7 +1065,7 @@ mod tests { array .to_column::(&alloc, &(1..3), None) .unwrap(), - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[1..3]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &data[1..3]) ); } @@ -1117,14 +1116,14 @@ mod tests { let data = vec![1625072400, 1625076000]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Utc"), )); let result = array .to_column::(&alloc, &(0..0), None) .unwrap(); assert_eq!( result, - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); } @@ -1146,7 +1145,7 @@ mod tests { let data = vec![1625072400, 1625076000]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Utc"), )); assert_eq!( diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index fb5dd908c..caea8938c 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -2,11 +2,13 @@ use super::{LiteralValue, TableRef}; use crate::base::{ math::decimal::{scale_scalar, Precision}, scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; use bumpalo::Bump; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{ + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, + Identifier, +}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -334,9 +336,10 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::TimestampTZ(timeunit, timezone) => { - DataType::Timestamp(ArrowTimeUnit::from(*timeunit), Some(Arc::from(timezone))) - } + ColumnType::TimestampTZ(timeunit, timezone) => DataType::Timestamp( + ArrowTimeUnit::from(*timeunit), + Some(Arc::from(timezone.to_string())), + ), } } } @@ -357,7 +360,7 @@ impl TryFrom for ColumnType { } DataType::Timestamp(time_unit, timezone_option) => Ok(ColumnType::TimestampTZ( PoSQLTimeUnit::from(time_unit), - PoSQLTimeZone::try_from(timezone_option)?, + PoSQLTimeZone::try_from(&timezone_option)?, )), DataType::Utf8 => Ok(ColumnType::VarChar), _ => Err(format!("Unsupported arrow data type {:?}", data_type)), @@ -471,9 +474,9 @@ mod tests { #[test] fn column_type_serializes_to_string() { - let column_type = ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC); + let column_type = ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc); let serialized = serde_json::to_string(&column_type).unwrap(); - assert_eq!(serialized, r#"{"TimestampTZ":["Second","UTC"]}"#); + assert_eq!(serialized, r#"{"TimestampTZ":["Second","Utc"]}"#); let column_type = ColumnType::Boolean; let serialized = serde_json::to_string(&column_type).unwrap(); diff --git a/crates/proof-of-sql/src/base/database/literal_value.rs b/crates/proof-of-sql/src/base/database/literal_value.rs index e7ced4b93..e75148b9f 100644 --- a/crates/proof-of-sql/src/base/database/literal_value.rs +++ b/crates/proof-of-sql/src/base/database/literal_value.rs @@ -1,9 +1,5 @@ -use crate::base::{ - database::ColumnType, - math::decimal::Precision, - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, -}; +use crate::base::{database::ColumnType, math::decimal::Precision, scalar::Scalar}; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; use serde::{Deserialize, Serialize}; /// Represents a literal value. diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index e0710e0ed..808e075ed 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -20,7 +20,6 @@ use crate::base::{ }, math::decimal::Precision, scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; use arrow::{ array::{ @@ -33,7 +32,11 @@ use arrow::{ record_batch::RecordBatch, }; use indexmap::IndexMap; -use proof_of_sql_parser::{Identifier, ParseError}; +use proof_of_sql_parser::{ + error::PoSQLTimestampError, + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, + Identifier, ParseError, +}; use std::sync::Arc; use thiserror::Error; @@ -56,12 +59,9 @@ pub enum OwnedArrowConversionError { /// This error occurs when trying to convert from an Arrow array with nulls. #[error("null values are not supported in OwnedColumn yet")] NullNotSupportedYet, - /// This error occurs when trying to convert from an unsupported timestamp unit. - #[error("unsupported timestamp unit: {0}")] - UnsupportedTimestampUnit(String), - /// This error occurs when trying to convert from an invalid timezone string. - #[error("invalid timezone string: {0}")] - InvalidTimezone(String), // New error variant for timezone strings + /// Using TimeError to handle all time-related errors + #[error(transparent)] + TimestampConversionError(#[from] PoSQLTimestampError), } impl From> for ArrayRef { @@ -193,15 +193,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Second".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } @@ -209,15 +207,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Millisecond".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Millisecond, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } @@ -225,15 +221,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Microsecond".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Microsecond, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } @@ -241,15 +235,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Nanosecond".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Nanosecond, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index ce1a3e321..14eb9ba6a 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -3,11 +3,9 @@ /// converting to the final result in either Arrow format or JSON. /// This is the analog of an arrow Array. use super::ColumnType; -use crate::base::{ - math::decimal::Precision, - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, -}; +use crate::base::{math::decimal::Precision, scalar::Scalar}; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; + #[derive(Debug, PartialEq, Clone, Eq)] #[non_exhaustive] /// Supported types for OwnedColumn diff --git a/crates/proof-of-sql/src/base/database/owned_table_test.rs b/crates/proof-of-sql/src/base/database/owned_table_test.rs index adbec791c..2abdf869a 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test.rs @@ -2,12 +2,14 @@ use crate::{ base::{ database::{owned_table_utility::*, OwnedColumn, OwnedTable, OwnedTableError}, scalar::Curve25519Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }, proof_primitive::dory::DoryScalar, }; use indexmap::IndexMap; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{ + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, + Identifier, +}; #[test] fn we_can_create_an_owned_table_with_no_columns() { @@ -58,18 +60,18 @@ fn we_can_create_an_owned_table_with_data() { [true, false, true, false, true, false, true, false, true], ), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], ), ]); let mut table = IndexMap::new(); table.insert( - Identifier::try_new("timestamp").unwrap(), + Identifier::try_new("time_stamp").unwrap(), OwnedColumn::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX].into(), ), ); @@ -125,9 +127,9 @@ fn we_get_inequality_between_tables_with_differing_column_order() { varchar("c", ["0"; 0]), boolean("d", [false; 0]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0; 0], ), ]); @@ -137,9 +139,9 @@ fn we_get_inequality_between_tables_with_differing_column_order() { bigint("a", [0; 0]), varchar("c", ["0"; 0]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0; 0], ), ]); @@ -153,9 +155,9 @@ fn we_get_inequality_between_tables_with_differing_data() { varchar("c", ["0"]), boolean("d", [true]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [1625072400], ), ]); @@ -165,9 +167,9 @@ fn we_get_inequality_between_tables_with_differing_data() { varchar("c", ["0"]), boolean("d", [true]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [1625076000], ), ]); diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs index 5364cbb99..7c211eb94 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs @@ -5,9 +5,9 @@ use super::{ use crate::base::{ database::owned_table_utility::*, scalar::{compute_commitment_for_testing, Curve25519Scalar}, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; use blitzar::proof::InnerProductProof; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; #[test] fn we_can_query_the_length_of_a_table() { @@ -52,7 +52,7 @@ fn we_can_access_the_columns_of_a_table() { timestamptz( "time", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [4, 5, 6, 5], ), ]); @@ -110,7 +110,7 @@ fn we_can_access_the_columns_of_a_table() { let column = ColumnRef::new( table_ref_2, "time".parse().unwrap(), - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc), ); match accessor.get_column(column) { Column::TimestampTZ(_, _, col) => assert_eq!(col.to_vec(), vec![4, 5, 6, 5]), diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index 7867e918e..4f77f933d 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -14,12 +14,12 @@ //! ]); //! ``` use super::{OwnedColumn, OwnedTable}; -use crate::base::{ - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, -}; +use crate::base::scalar::Scalar; use core::ops::Deref; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{ + posql_time::{timezone, unit::PoSQLTimeUnit}, + Identifier, +}; /// Creates an OwnedTable from a list of (Identifier, OwnedColumn) pairs. /// This is a convenience wrapper around OwnedTable::try_from_iter primarily for use in tests and @@ -212,18 +212,18 @@ pub fn decimal75( /// ``` /// use proof_of_sql::base::{database::owned_table_utility::*, /// scalar::Curve25519Scalar, -/// time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone} /// }; -/// use chrono_tz::Europe::London; +/// use proof_of_sql_parser::{ +/// posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}}; /// /// let result = owned_table::([ -/// timestamptz("event_time", PoSQLTimeUnit::Second, PoSQLTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), +/// timestamptz("event_time", PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, vec![1625072400, 1625076000, 1625079600]), /// ]); /// ``` pub fn timestamptz( name: impl Deref, time_unit: PoSQLTimeUnit, - timezone: PoSQLTimeZone, + timezone: timezone::PoSQLTimeZone, data: impl IntoIterator, ) -> (Identifier, OwnedColumn) { ( diff --git a/crates/proof-of-sql/src/base/database/record_batch_utility.rs b/crates/proof-of-sql/src/base/database/record_batch_utility.rs index 7c67c8f7c..7cccd6424 100644 --- a/crates/proof-of-sql/src/base/database/record_batch_utility.rs +++ b/crates/proof-of-sql/src/base/database/record_batch_utility.rs @@ -1,8 +1,8 @@ -use crate::base::time::timestamp::{PoSQLTimeUnit, Time}; use arrow::array::{ TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }; +use proof_of_sql_parser::posql_time::unit::PoSQLTimeUnit; use std::sync::Arc; /// Extension trait for Vec to convert it to an Arrow array @@ -23,6 +23,16 @@ impl ToArrow for Vec { } } +/// A wrapper around i64 to mitigate conflicting From +/// implementations +#[derive(Clone)] +pub struct Time { + /// i64 count of timeunits since unix epoch + pub timestamp: i64, + /// Timeunit of this time + pub unit: PoSQLTimeUnit, +} + impl ToArrow for Vec