Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support DuckDB struct syntax and support list of struct syntax #1372

Merged
merged 14 commits into from
Aug 15, 2024
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ Cargo.lock
.vscode

*.swp

.DS_store
24 changes: 21 additions & 3 deletions src/ast/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ pub enum DataType {
///
/// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Struct(Vec<StructField>),
Struct(Vec<StructField>, StructBracketKind),
/// Union
///
/// [duckdb]: https://duckdb.org/docs/sql/data_types/union.html
Expand Down Expand Up @@ -517,9 +517,16 @@ impl fmt::Display for DataType {
}
write!(f, ")")
}
DataType::Struct(fields) => {
DataType::Struct(fields, bracket) => {
if !fields.is_empty() {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
match bracket {
StructBracketKind::Parentheses => {
write!(f, "STRUCT({})", display_comma_separated(fields))
}
StructBracketKind::AngleBrackets => {
write!(f, "STRUCT<{}>", display_comma_separated(fields))
}
}
} else {
write!(f, "STRUCT")
}
Expand Down Expand Up @@ -618,6 +625,17 @@ fn format_clickhouse_datetime_precision_and_timezone(
Ok(())
}

/// Type of brackets used for `STRUCT` literals.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also added some comments on this struct

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum StructBracketKind {
/// Example: `STRUCT(a INT, b STRING)`
Parentheses,
/// Example: `STRUCT<a INT, b STRING>`
AngleBrackets,
}

/// Timestamp and Time data types information about TimeZone formatting.
///
/// This is more related to a display information than real differences between each variant. To
Expand Down
3 changes: 2 additions & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ use serde::{Deserialize, Serialize};
use sqlparser_derive::{Visit, VisitMut};

pub use self::data_type::{
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue};
pub use self::ddl::{
Expand Down
27 changes: 26 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2266,6 +2266,23 @@ impl<'a> Parser<'a> {
))
}

/// Duckdb Struct Data Type <https://duckdb.org/docs/sql/data_types/struct.html#retrieving-from-structs>
fn parse_duckdb_struct_type_def(&mut self) -> Result<Vec<StructField>, ParserError> {
jayzhan211 marked this conversation as resolved.
Show resolved Hide resolved
self.expect_keyword(Keyword::STRUCT)?;
self.expect_token(&Token::LParen)?;
let struct_body = self.parse_comma_separated(|parser| {
let field_name = parser.parse_identifier(false)?;
let field_type = parser.parse_data_type()?;

Ok(StructField {
field_name: Some(field_name),
field_type,
})
});
self.expect_token(&Token::RParen)?;
struct_body
}

/// Parse a field definition in a [struct] or [tuple].
/// Syntax:
///
Expand Down Expand Up @@ -7495,12 +7512,20 @@ impl<'a> Parser<'a> {
))))
}
}
Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => {
self.prev_token();
let field_defs = self.parse_duckdb_struct_type_def()?;
Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses))
}
Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
self.prev_token();
let (field_defs, _trailing_bracket) =
self.parse_struct_type_def(Self::parse_struct_field_def)?;
trailing_bracket = _trailing_bracket;
Ok(DataType::Struct(field_defs))
Ok(DataType::Struct(
field_defs,
StructBracketKind::AngleBrackets,
))
}
Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => {
self.prev_token();
Expand Down
74 changes: 46 additions & 28 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -489,28 +489,34 @@ fn parse_nested_data_types() {
vec![
ColumnDef {
name: Ident::new("x"),
data_type: DataType::Struct(vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
]),
data_type: DataType::Struct(
vec![
StructField {
field_name: Some("a".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(
Box::new(DataType::Int64,)
))
},
StructField {
field_name: Some("b".into()),
field_type: DataType::Bytes(Some(42))
},
],
StructBracketKind::AngleBrackets
),
collation: None,
options: vec![],
},
ColumnDef {
name: Ident::new("y"),
data_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Int64,
}]),
DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Int64,
}],
StructBracketKind::AngleBrackets
),
))),
collation: None,
options: vec![],
Expand Down Expand Up @@ -708,10 +714,13 @@ fn parse_typed_struct_syntax_bigquery() {
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
field_type: DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Bool
}],
StructBracketKind::AngleBrackets
)
},
]
},
Expand All @@ -730,12 +739,15 @@ fn parse_typed_struct_syntax_bigquery() {
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
)))
},
]
Expand Down Expand Up @@ -1013,10 +1025,13 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
},
StructField {
field_name: Some("str".into()),
field_type: DataType::Struct(vec![StructField {
field_name: None,
field_type: DataType::Bool
}])
field_type: DataType::Struct(
vec![StructField {
field_name: None,
field_type: DataType::Bool
}],
StructBracketKind::AngleBrackets
)
},
]
},
Expand All @@ -1035,12 +1050,15 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
fields: vec![
StructField {
field_name: Some("x".into()),
field_type: DataType::Struct(Default::default())
field_type: DataType::Struct(
Default::default(),
StructBracketKind::AngleBrackets
)
},
StructField {
field_name: Some("y".into()),
field_type: DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new(
DataType::Struct(Default::default())
DataType::Struct(Default::default(), StructBracketKind::AngleBrackets)
)))
},
]
Expand Down
112 changes: 112 additions & 0 deletions tests/sqlparser_duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,118 @@ fn duckdb_and_generic() -> TestedDialects {
}
}

#[test]
fn test_struct() {
// s STRUCT(v VARCHAR, i INTEGER)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the tests to also have coverage of the actual parsed data types

let struct_type1 = DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("v")),
field_type: DataType::Varchar(None),
},
StructField {
field_name: Some(Ident::new("i")),
field_type: DataType::Integer(None),
},
],
StructBracketKind::Parentheses,
);

// basic struct
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))"#);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: struct_type1.clone(),
collation: None,
options: vec![],
}]
);

// struct array
let statement = duckdb().verified_stmt(r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)[])"#);
assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
Box::new(struct_type1),
None
)),
collation: None,
options: vec![],
}]
);

// s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))
let struct_type2 = DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("v")),
field_type: DataType::Varchar(None),
},
StructField {
field_name: Some(Ident::new("s")),
field_type: DataType::Struct(
vec![
StructField {
field_name: Some(Ident::new("a1")),
field_type: DataType::Integer(None),
},
StructField {
field_name: Some(Ident::new("a2")),
field_type: DataType::Varchar(None),
},
],
StructBracketKind::Parentheses,
),
},
],
StructBracketKind::Parentheses,
);

// nested struct
let statement = duckdb().verified_stmt(
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, s STRUCT(a1 INTEGER, a2 VARCHAR))[])"#,
);

assert_eq!(
column_defs(statement),
vec![ColumnDef {
name: "s".into(),
data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
Box::new(struct_type2),
None
)),
collation: None,
options: vec![],
}]
);

// failing test (duckdb does not support bracket syntax)
let sql_list = vec![
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER)))"#,
jayzhan211 marked this conversation as resolved.
Show resolved Hide resolved
r#"CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT<v VARCHAR, i INTEGER>)"#,
r#"CREATE TABLE t1 (s STRUCT v VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT VARCHAR, i INTEGER )"#,
r#"CREATE TABLE t1 (s STRUCT (VARCHAR, INTEGER))"#,
];

for sql in sql_list {
duckdb().parse_sql_statements(sql).unwrap_err();
}
}

/// Returns the ColumnDefinitions from a CreateTable statement
fn column_defs(statement: Statement) -> Vec<ColumnDef> {
match statement {
Statement::CreateTable(CreateTable { columns, .. }) => columns,
_ => panic!("Expected CreateTable"),
}
}
jayzhan211 marked this conversation as resolved.
Show resolved Hide resolved

#[test]
fn test_select_wildcard_with_exclude() {
let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data");
Expand Down
Loading