diff --git a/.gitignore b/.gitignore index d41369207..4c6821d47 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ Cargo.lock .vscode *.swp + +.DS_store \ No newline at end of file diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index ff2a3ad04..f3ebd16da 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -302,7 +302,7 @@ pub enum DataType { /// /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type - Struct(Vec), + Struct(Vec, StructBracketKind), /// Union /// /// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html @@ -517,9 +517,16 @@ impl fmt::Display for DataType { } write!(f, ")") } - DataType::Struct(fields) => { + DataType::Struct(fields, bracket) => { if !fields.is_empty() { - write!(f, "STRUCT<{}>", display_comma_separated(fields)) + match bracket { + StructBracketKind::Parentheses => { + write!(f, "STRUCT({})", display_comma_separated(fields)) + } + StructBracketKind::AngleBrackets => { + write!(f, "STRUCT<{}>", display_comma_separated(fields)) + } + } } else { write!(f, "STRUCT") } @@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone( Ok(()) } +/// Type of brackets used for `STRUCT` literals. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum StructBracketKind { + /// Example: `STRUCT(a INT, b STRING)` + Parentheses, + /// Example: `STRUCT` + AngleBrackets, +} + /// Timestamp and Time data types information about TimeZone formatting. /// /// This is more related to a display information than real differences between each variant. To diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e3e9a5371..c4533ef57 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; pub use self::data_type::{ - ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, + ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, + StructBracketKind, TimezoneInfo, }; pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; pub use self::ddl::{ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b6ec150f5..900ad9081 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> { )) } + /// Duckdb Struct Data Type + fn parse_duckdb_struct_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::STRUCT)?; + self.expect_token(&Token::LParen)?; + let struct_body = self.parse_comma_separated(|parser| { + let field_name = parser.parse_identifier(false)?; + let field_type = parser.parse_data_type()?; + + Ok(StructField { + field_name: Some(field_name), + field_type, + }) + }); + self.expect_token(&Token::RParen)?; + struct_body + } + /// Parse a field definition in a [struct] or [tuple]. /// Syntax: /// @@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> { )))) } } + Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => { + self.prev_token(); + let field_defs = self.parse_duckdb_struct_type_def()?; + Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) + } Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { self.prev_token(); let (field_defs, _trailing_bracket) = self.parse_struct_type_def(Self::parse_struct_field_def)?; trailing_bracket = _trailing_bracket; - Ok(DataType::Struct(field_defs)) + Ok(DataType::Struct( + field_defs, + StructBracketKind::AngleBrackets, + )) } Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { self.prev_token(); diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 134c8ddad..57cf9d7fd 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -489,28 +489,34 @@ fn parse_nested_data_types() { vec![ ColumnDef { name: Ident::new("x"), - data_type: DataType::Struct(vec![ - StructField { - field_name: Some("a".into()), - field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( - Box::new(DataType::Int64,) - )) - }, - StructField { - field_name: Some("b".into()), - field_type: DataType::Bytes(Some(42)) - }, - ]), + data_type: DataType::Struct( + vec![ + StructField { + field_name: Some("a".into()), + field_type: DataType::Array(ArrayElemTypeDef::AngleBracket( + Box::new(DataType::Int64,) + )) + }, + StructField { + field_name: Some("b".into()), + field_type: DataType::Bytes(Some(42)) + }, + ], + StructBracketKind::AngleBrackets + ), collation: None, options: vec![], }, ColumnDef { name: Ident::new("y"), data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( - DataType::Struct(vec![StructField { - field_name: None, - field_type: DataType::Int64, - }]), + DataType::Struct( + vec![StructField { + field_name: None, + field_type: DataType::Int64, + }], + StructBracketKind::AngleBrackets + ), ))), collation: None, options: vec![], @@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() { }, StructField { field_name: Some("str".into()), - field_type: DataType::Struct(vec![StructField { - field_name: None, - field_type: DataType::Bool - }]) + field_type: DataType::Struct( + vec![StructField { + field_name: None, + field_type: DataType::Bool + }], + StructBracketKind::AngleBrackets + ) }, ] }, @@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() { fields: vec![ StructField { field_name: Some("x".into()), - field_type: DataType::Struct(Default::default()) + field_type: DataType::Struct( + Default::default(), + StructBracketKind::AngleBrackets + ) }, StructField { field_name: Some("y".into()), field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( - DataType::Struct(Default::default()) + DataType::Struct(Default::default(), StructBracketKind::AngleBrackets) ))) }, ] @@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { }, StructField { field_name: Some("str".into()), - field_type: DataType::Struct(vec![StructField { - field_name: None, - field_type: DataType::Bool - }]) + field_type: DataType::Struct( + vec![StructField { + field_name: None, + field_type: DataType::Bool + }], + StructBracketKind::AngleBrackets + ) }, ] }, @@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() { fields: vec![ StructField { field_name: Some("x".into()), - field_type: DataType::Struct(Default::default()) + field_type: DataType::Struct( + Default::default(), + StructBracketKind::AngleBrackets + ) }, StructField { field_name: Some("y".into()), field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( - DataType::Struct(Default::default()) + DataType::Struct(Default::default(), StructBracketKind::AngleBrackets) ))) }, ] diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 0e61b86c9..6e6c4e230 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects { } } +#[test] +fn test_struct() { + // s STRUCT(v VARCHAR, i INTEGER) + let struct_type1 = DataType::Struct( + vec![ + StructField { + field_name: Some(Ident::new("v")), + field_type: DataType::Varchar(None), + }, + StructField { + field_name: Some(Ident::new("i")), + field_type: DataType::Integer(None), + }, + ], + StructBracketKind::Parentheses, + ); + + // basic struct + let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#); + assert_eq!( + column_defs(statement), + vec![ColumnDef { + name: "s".into(), + data_type: struct_type1.clone(), + collation: None, + options: vec![], + }] + ); + + // struct array + let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#); + assert_eq!( + column_defs(statement), + vec![ColumnDef { + name: "s".into(), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(struct_type1), + None + )), + collation: None, + options: vec![], + }] + ); + + // s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR)) + let struct_type2 = DataType::Struct( + vec![ + StructField { + field_name: Some(Ident::new("v")), + field_type: DataType::Varchar(None), + }, + StructField { + field_name: Some(Ident::new("s")), + field_type: DataType::Struct( + vec![ + StructField { + field_name: Some(Ident::new("a1")), + field_type: DataType::Integer(None), + }, + StructField { + field_name: Some(Ident::new("a2")), + field_type: DataType::Varchar(None), + }, + ], + StructBracketKind::Parentheses, + ), + }, + ], + StructBracketKind::Parentheses, + ); + + // nested struct + let statement = duckdb().verified_stmt( + r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#, + ); + + assert_eq!( + column_defs(statement), + vec![ColumnDef { + name: "s".into(), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(struct_type2), + None + )), + collation: None, + options: vec![], + }] + ); + + // failing test (duckdb does not support bracket syntax) + let sql_list = vec![ + r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#, + r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#, + r#"CREATE TABLE t1 (s STRUCT)"#, + r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#, + r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#, + r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#, + ]; + + for sql in sql_list { + duckdb().parse_sql_statements(sql).unwrap_err(); + } +} + +/// Returns the ColumnDefinitions from a CreateTable statement +fn column_defs(statement: Statement) -> Vec { + match statement { + Statement::CreateTable(CreateTable { columns, .. }) => columns, + _ => panic!("Expected CreateTable"), + } +} + #[test] fn test_select_wildcard_with_exclude() { let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");