From b9ce75aec9024ec5070ec83b23944fb88736a1c5 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Thu, 23 May 2024 14:23:00 -0700 Subject: [PATCH 1/5] subscript --- src/ast/mod.rs | 66 ++++++++++++--- src/parser/mod.rs | 43 ++++++---- tests/sqlparser_duckdb.rs | 8 +- tests/sqlparser_postgres.rs | 159 ++++++++++++++++++++++++++++-------- 4 files changed, 216 insertions(+), 60 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d937b7275..ff5b57357 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -745,10 +745,10 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), - /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` - ArrayIndex { - obj: Box, - indexes: Vec, + /// An access of nested data using subscript syntax, for example `array[2]`. + Subscript { + expr: Box, + subscript: Box, }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), @@ -804,6 +804,53 @@ pub enum Expr { Lambda(LambdaFunction), } +/// The contents inside the `[` and `]` in a subscript expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Subscript { + /// Accesses the element of the array at the given index. + Index { index: Expr }, + + /// Accesses a slice of an array on PostgreSQL, e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[2:5]; + /// ----------- + /// {2,3,4,5} + /// ``` + /// + /// The lower and/or upper bound can be omitted to slice from the start or + /// end of the array respectively. + /// + /// See . + Slice { + lower_bound: Option, + upper_bound: Option, + }, +} + +impl fmt::Display for Subscript { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Subscript::Index { index } => write!(f, "{index}"), + Subscript::Slice { + lower_bound, + upper_bound, + } => { + if let Some(lower) = lower_bound { + write!(f, "{lower}")?; + } + write!(f, ":")?; + if let Some(upper) = upper_bound { + write!(f, "{upper}")?; + } + Ok(()) + } + } + } +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1250,12 +1297,11 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } - Expr::ArrayIndex { obj, indexes } => { - write!(f, "{obj}")?; - for i in indexes { - write!(f, "[{i}]")?; - } - Ok(()) + Expr::Subscript { + expr, + subscript: key, + } => { + write!(f, "{expr}[{key}]") } Expr::Array(set) => { write!(f, "{set}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8132921f1..5e0d78e61 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2582,8 +2582,7 @@ impl<'a> Parser<'a> { }) } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - // parse index - self.parse_array_index(expr) + self.parse_subscript(expr) } else if dialect_of!(self is SnowflakeDialect) { self.prev_token(); self.parse_json_access(expr) @@ -2611,18 +2610,34 @@ impl<'a> Parser<'a> { } } - pub fn parse_array_index(&mut self, expr: Expr) -> Result { - let index = self.parse_expr()?; + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let parse_upper_bound = |p: &mut Parser<'a>| { + if let Token::RBracket = p.peek_token().token { + Ok(None) + } else { + p.parse_expr().map(Some) + } + }; + let subscript = if self.consume_token(&Token::Colon) { + Subscript::Slice { + lower_bound: None, + upper_bound: parse_upper_bound(self)?, + } + } else { + let expr = self.parse_expr()?; + if self.consume_token(&Token::Colon) { + Subscript::Slice { + lower_bound: Some(expr), + upper_bound: parse_upper_bound(self)?, + } + } else { + Subscript::Index { index: expr } + } + }; self.expect_token(&Token::RBracket)?; - let mut indexes: Vec = vec![index]; - while self.consume_token(&Token::LBracket) { - let index = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - indexes.push(index); - } - Ok(Expr::ArrayIndex { - obj: Box::new(expr), - indexes, + Ok(Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(subscript), }) } @@ -2872,7 +2887,7 @@ impl<'a> Parser<'a> { Ok(Self::MUL_DIV_MOD_OP_PREC) } Token::DoubleColon => Ok(50), - Token::Colon => Ok(50), + Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), Token::ExclamationMark => Ok(50), Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), Token::Arrow diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a84da5378..8d12945dd 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -528,8 +528,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Array(Array { + &Expr::Subscript { + expr: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -537,7 +537,9 @@ fn test_array_index() { ], named: false })), - indexes: vec![Expr::Value(number("3"))] + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("3")) + }) }, expr ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d68ebd556..a83c99c7f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1873,9 +1873,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1883,9 +1885,16 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone(), num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1893,19 +1902,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("bar"))), - indexes: vec![ - num[0].clone(), - Expr::Identifier(Ident { - value: "baz".to_string(), - quote_style: Some('"') + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("bar"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }) }), - Expr::Identifier(Ident { + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { + value: "baz".to_string(), + quote_style: Some('"') + }) + }) + }), + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { value: "fooz".to_string(), quote_style: Some('"') }) - ], + }) }, expr_from_projection(only(&select.projection)), ); @@ -1913,26 +1930,33 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], + named: true, + })], named: true, - })], - named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), + None + ))), None - ))), - None - )), - format: None, - }))), - indexes: vec![num[1].clone(), num[2].clone()], + )), + format: None, + }))), + subscript: Box::new(Subscript::Index { + index: num[1].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[2].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1948,6 +1972,75 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_array_subscript() { + let tests = [ + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2]", + Subscript::Index { + index: Expr::Value(number("2")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[foo]", + Subscript::Index { + index: Expr::Identifier(Ident::new("foo")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + }, + ), + ( + "arr[array_length(arr) - 3:array_length(arr) - 1]", + Subscript::Slice { + lower_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("3"))), + }), + upper_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("1"))), + }), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:5]", + Subscript::Slice { + lower_bound: None, + upper_bound: Some(Expr::Value(number("5"))), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:]", + Subscript::Slice { + lower_bound: None, + upper_bound: None, + }, + ), + ]; + for (sql, expect) in tests { + let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + panic!("expected subscript expr"); + }; + assert_eq!(expect, *subscript); + } + + // pg_and_generic().verified_expr("schedule[:2][2:]"); +} + #[test] fn parse_create_index() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; From bd573b2b0ba4f55523bcb8682dcfb7e21914a792 Mon Sep 17 00:00:00 2001 From: Joey Hain Date: Mon, 27 May 2024 13:22:54 -0700 Subject: [PATCH 2/5] add tests for similar looking snowflake syntax --- src/ast/mod.rs | 2 +- tests/sqlparser_postgres.rs | 2 +- tests/sqlparser_snowflake.rs | 30 ++++++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ff5b57357..190f9bf8f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -678,7 +678,7 @@ pub enum Expr { }, /// Access a map-like object by field (e.g. `column['field']` or `column[4]` /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess) + /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) /// MapAccess { column: Box, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index a83c99c7f..ad4165e79 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2038,7 +2038,7 @@ fn parse_array_subscript() { assert_eq!(expect, *subscript); } - // pg_and_generic().verified_expr("schedule[:2][2:]"); + pg_and_generic().verified_expr("schedule[:2][2:]"); } #[test] diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 30f2cc601..6c2228a5e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -344,6 +344,36 @@ fn parse_semi_structured_data_traversal() { })], select.projection ); + + // a json access used as a key to another json access + assert_eq!( + snowflake().verified_expr("a[b:c]"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Bracket { + key: Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("b"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "c".to_owned(), + quoted: false + }] + } + } + }] + } + } + ); + + // unquoted object keys cannot start with a digit + assert_eq!( + snowflake() + .parse_sql_statements("SELECT a:42") + .unwrap_err() + .to_string(), + "sql parser error: Expected variant object key name, found: 42" + ); } #[test] From 4df6dc8ef343a7bace9d0cb43996d32ebc44f75c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 30 May 2024 11:10:55 -0400 Subject: [PATCH 3/5] Support optional stride in array access --- src/ast/mod.rs | 14 ++++++ src/parser/mod.rs | 97 ++++++++++++++++++++++++++++--------- tests/sqlparser_postgres.rs | 13 +++++ 3 files changed, 102 insertions(+), 22 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 190f9bf8f..796a046a9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -820,6 +820,14 @@ pub enum Subscript { /// {2,3,4,5} /// ``` /// + /// Stride notation is also supported + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[1:6:2]; + /// ----------- + /// {1,3,5} + /// ``` + /// /// The lower and/or upper bound can be omitted to slice from the start or /// end of the array respectively. /// @@ -827,6 +835,7 @@ pub enum Subscript { Slice { lower_bound: Option, upper_bound: Option, + stride: Option, }, } @@ -837,6 +846,7 @@ impl fmt::Display for Subscript { Subscript::Slice { lower_bound, upper_bound, + stride, } => { if let Some(lower) = lower_bound { write!(f, "{lower}")?; @@ -845,6 +855,10 @@ impl fmt::Display for Subscript { if let Some(upper) = upper_bound { write!(f, "{upper}")?; } + if let Some(stride) = stride { + write!(f, ":")?; + write!(f, "{stride}")?; + } Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5e0d78e61..3986f81ed 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2610,31 +2610,84 @@ impl<'a> Parser<'a> { } } - pub fn parse_subscript(&mut self, expr: Expr) -> Result { - let parse_upper_bound = |p: &mut Parser<'a>| { - if let Token::RBracket = p.peek_token().token { - Ok(None) - } else { - p.parse_expr().map(Some) - } + /// Parses an array subscript like + /// * `[:]` + /// * `[l]` + /// * `[l:]` + /// * `[:u]` + /// * `[l:u]` + /// * `[l:u:s]` + /// + /// Parser is right after `[` + fn parse_subscript_inner(&mut self) -> Result { + // at either `:(rest)` or `:(rest)]` + let lower_bound = if self.consume_token(&Token::Colon) { + None + } else { + Some(self.parse_expr()?) }; - let subscript = if self.consume_token(&Token::Colon) { - Subscript::Slice { - lower_bound: None, - upper_bound: parse_upper_bound(self)?, - } + + // check for end + if self.consume_token(&Token::RBracket) { + if let Some(lower_bound) = lower_bound { + return Ok(Subscript::Index { index: lower_bound }); + }; + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } + + // consume the `:` + if lower_bound.is_some() { + self.expect_token(&Token::Colon)?; + } + + // we are now at either `]`, `(rest)]` + let upper_bound = if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); } else { - let expr = self.parse_expr()?; - if self.consume_token(&Token::Colon) { - Subscript::Slice { - lower_bound: Some(expr), - upper_bound: parse_upper_bound(self)?, - } - } else { - Subscript::Index { index: expr } - } + Some(self.parse_expr()?) }; - self.expect_token(&Token::RBracket)?; + + // check for end + if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride: None, + }); + } + + // we are now at `:]` or `:stride]` + self.expect_token(&Token::Colon)?; + let stride = if self.consume_token(&Token::RBracket) { + None + } else { + Some(self.parse_expr()?) + }; + + if stride.is_some() { + self.expect_token(&Token::RBracket)?; + } + + Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride, + }) + } + + /// Parses an array subscript like `[1:3]` + /// + /// Parser is right after `[` + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let subscript = self.parse_subscript_inner()?; Ok(Expr::Subscript { expr: Box::new(expr), subscript: Box::new(subscript), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ad4165e79..43a5d2530 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1992,6 +1992,15 @@ fn parse_array_subscript() { Subscript::Slice { lower_bound: Some(Expr::Value(number("2"))), upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5:3]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: Some(Expr::Value(number("3"))), }, ), ( @@ -2007,6 +2016,7 @@ fn parse_array_subscript() { op: BinaryOperator::Minus, right: Box::new(Expr::Value(number("1"))), }), + stride: None, }, ), ( @@ -2014,6 +2024,7 @@ fn parse_array_subscript() { Subscript::Slice { lower_bound: None, upper_bound: Some(Expr::Value(number("5"))), + stride: None, }, ), ( @@ -2021,6 +2032,7 @@ fn parse_array_subscript() { Subscript::Slice { lower_bound: Some(Expr::Value(number("2"))), upper_bound: None, + stride: None, }, ), ( @@ -2028,6 +2040,7 @@ fn parse_array_subscript() { Subscript::Slice { lower_bound: None, upper_bound: None, + stride: None, }, ), ]; From 1ac89af0f888762cebf80f1ca401dbf59dd9457f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 30 May 2024 13:44:56 -0400 Subject: [PATCH 4/5] clarify comments --- src/ast/mod.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 796a046a9..05c93197a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -820,18 +820,19 @@ pub enum Subscript { /// {2,3,4,5} /// ``` /// - /// Stride notation is also supported + /// The lower and/or upper bound can be omitted to slice from the start or + /// end of the array respectively. + /// + /// See . + /// + /// Also supports an optional "stride" as the last element (this is not + /// supported by postgres), e.g. /// /// ```plaintext /// => select (array[1,2,3,4,5,6])[1:6:2]; /// ----------- /// {1,3,5} /// ``` - /// - /// The lower and/or upper bound can be omitted to slice from the start or - /// end of the array respectively. - /// - /// See . Slice { lower_bound: Option, upper_bound: Option, From def916c01bf7a76ee0a7a238c7200691d8600105 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 31 May 2024 17:20:33 -0400 Subject: [PATCH 5/5] Add test for multiple array access --- tests/sqlparser_postgres.rs | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 43a5d2530..cf77d9643 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2054,6 +2054,34 @@ fn parse_array_subscript() { pg_and_generic().verified_expr("schedule[:2][2:]"); } +#[test] +fn parse_array_multi_subscript() { + let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); + assert_eq!( + Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(call( + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), + subscript: Box::new(Subscript::Slice { + lower_bound: Some(Expr::Value(number("1"))), + upper_bound: Some(Expr::Value(number("2"))), + stride: None, + }), + }), + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("2")), + }), + }, + expr, + ); +} + #[test] fn parse_create_index() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)";