diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d937b7275..05c93197a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -678,7 +678,7 @@ pub enum Expr { }, /// Access a map-like object by field (e.g. `column['field']` or `column[4]` /// Note that depending on the dialect, struct like accesses may be - /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess) + /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess) /// MapAccess { column: Box, @@ -745,10 +745,10 @@ pub enum Expr { /// ``` /// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs Dictionary(Vec), - /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]` - ArrayIndex { - obj: Box, - indexes: Vec, + /// An access of nested data using subscript syntax, for example `array[2]`. + Subscript { + expr: Box, + subscript: Box, }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), @@ -804,6 +804,68 @@ pub enum Expr { Lambda(LambdaFunction), } +/// The contents inside the `[` and `]` in a subscript expression. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum Subscript { + /// Accesses the element of the array at the given index. + Index { index: Expr }, + + /// Accesses a slice of an array on PostgreSQL, e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[2:5]; + /// ----------- + /// {2,3,4,5} + /// ``` + /// + /// The lower and/or upper bound can be omitted to slice from the start or + /// end of the array respectively. + /// + /// See . + /// + /// Also supports an optional "stride" as the last element (this is not + /// supported by postgres), e.g. + /// + /// ```plaintext + /// => select (array[1,2,3,4,5,6])[1:6:2]; + /// ----------- + /// {1,3,5} + /// ``` + Slice { + lower_bound: Option, + upper_bound: Option, + stride: Option, + }, +} + +impl fmt::Display for Subscript { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Subscript::Index { index } => write!(f, "{index}"), + Subscript::Slice { + lower_bound, + upper_bound, + stride, + } => { + if let Some(lower) = lower_bound { + write!(f, "{lower}")?; + } + write!(f, ":")?; + if let Some(upper) = upper_bound { + write!(f, "{upper}")?; + } + if let Some(stride) = stride { + write!(f, ":")?; + write!(f, "{stride}")?; + } + Ok(()) + } + } + } +} + /// A lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1250,12 +1312,11 @@ impl fmt::Display for Expr { Expr::Dictionary(fields) => { write!(f, "{{{}}}", display_comma_separated(fields)) } - Expr::ArrayIndex { obj, indexes } => { - write!(f, "{obj}")?; - for i in indexes { - write!(f, "[{i}]")?; - } - Ok(()) + Expr::Subscript { + expr, + subscript: key, + } => { + write!(f, "{expr}[{key}]") } Expr::Array(set) => { write!(f, "{set}") diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8132921f1..3986f81ed 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2582,8 +2582,7 @@ impl<'a> Parser<'a> { }) } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - // parse index - self.parse_array_index(expr) + self.parse_subscript(expr) } else if dialect_of!(self is SnowflakeDialect) { self.prev_token(); self.parse_json_access(expr) @@ -2611,18 +2610,87 @@ impl<'a> Parser<'a> { } } - pub fn parse_array_index(&mut self, expr: Expr) -> Result { - let index = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - let mut indexes: Vec = vec![index]; - while self.consume_token(&Token::LBracket) { - let index = self.parse_expr()?; + /// Parses an array subscript like + /// * `[:]` + /// * `[l]` + /// * `[l:]` + /// * `[:u]` + /// * `[l:u]` + /// * `[l:u:s]` + /// + /// Parser is right after `[` + fn parse_subscript_inner(&mut self) -> Result { + // at either `:(rest)` or `:(rest)]` + let lower_bound = if self.consume_token(&Token::Colon) { + None + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + if let Some(lower_bound) = lower_bound { + return Ok(Subscript::Index { index: lower_bound }); + }; + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } + + // consume the `:` + if lower_bound.is_some() { + self.expect_token(&Token::Colon)?; + } + + // we are now at either `]`, `(rest)]` + let upper_bound = if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride: None, + }); + } + + // we are now at `:]` or `:stride]` + self.expect_token(&Token::Colon)?; + let stride = if self.consume_token(&Token::RBracket) { + None + } else { + Some(self.parse_expr()?) + }; + + if stride.is_some() { self.expect_token(&Token::RBracket)?; - indexes.push(index); } - Ok(Expr::ArrayIndex { - obj: Box::new(expr), - indexes, + + Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride, + }) + } + + /// Parses an array subscript like `[1:3]` + /// + /// Parser is right after `[` + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let subscript = self.parse_subscript_inner()?; + Ok(Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(subscript), }) } @@ -2872,7 +2940,7 @@ impl<'a> Parser<'a> { Ok(Self::MUL_DIV_MOD_OP_PREC) } Token::DoubleColon => Ok(50), - Token::Colon => Ok(50), + Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50), Token::ExclamationMark => Ok(50), Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), Token::Arrow diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a84da5378..8d12945dd 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -528,8 +528,8 @@ fn test_array_index() { _ => panic!("Expected an expression with alias"), }; assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Array(Array { + &Expr::Subscript { + expr: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_owned())), Expr::Value(Value::SingleQuotedString("b".to_owned())), @@ -537,7 +537,9 @@ fn test_array_index() { ], named: false })), - indexes: vec![Expr::Value(number("3"))] + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("3")) + }) }, expr ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d68ebd556..cf77d9643 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1873,9 +1873,11 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1883,9 +1885,16 @@ fn parse_array_index_expr() { let sql = "SELECT foo[0][0] FROM foos"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("foo"))), - indexes: vec![num[0].clone(), num[0].clone()], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("foo"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1893,19 +1902,27 @@ fn parse_array_index_expr() { let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Identifier(Ident::new("bar"))), - indexes: vec![ - num[0].clone(), - Expr::Identifier(Ident { - value: "baz".to_string(), - quote_style: Some('"') + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Identifier(Ident::new("bar"))), + subscript: Box::new(Subscript::Index { + index: num[0].clone() + }) }), - Expr::Identifier(Ident { + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { + value: "baz".to_string(), + quote_style: Some('"') + }) + }) + }), + subscript: Box::new(Subscript::Index { + index: Expr::Identifier(Ident { value: "fooz".to_string(), quote_style: Some('"') }) - ], + }) }, expr_from_projection(only(&select.projection)), ); @@ -1913,26 +1930,33 @@ fn parse_array_index_expr() { let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]"; let select = pg_and_generic().verified_only_select(sql); assert_eq!( - &Expr::ArrayIndex { - obj: Box::new(Expr::Nested(Box::new(Expr::Cast { - kind: CastKind::Cast, - expr: Box::new(Expr::Array(Array { - elem: vec![Expr::Array(Array { - elem: vec![num[2].clone(), num[3].clone(),], + &Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::Array(Array { + elem: vec![Expr::Array(Array { + elem: vec![num[2].clone(), num[3].clone(),], + named: true, + })], named: true, - })], - named: true, - })), - data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( - Box::new(DataType::Int(None)), + })), + data_type: DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket( + Box::new(DataType::Int(None)), + None + ))), None - ))), - None - )), - format: None, - }))), - indexes: vec![num[1].clone(), num[2].clone()], + )), + format: None, + }))), + subscript: Box::new(Subscript::Index { + index: num[1].clone() + }), + }), + subscript: Box::new(Subscript::Index { + index: num[2].clone() + }), }, expr_from_projection(only(&select.projection)), ); @@ -1948,6 +1972,116 @@ fn parse_array_index_expr() { ); } +#[test] +fn parse_array_subscript() { + let tests = [ + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2]", + Subscript::Index { + index: Expr::Value(number("2")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[foo]", + Subscript::Index { + index: Expr::Identifier(Ident::new("foo")), + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:5:3]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: Some(Expr::Value(number("5"))), + stride: Some(Expr::Value(number("3"))), + }, + ), + ( + "arr[array_length(arr) - 3:array_length(arr) - 1]", + Subscript::Slice { + lower_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("3"))), + }), + upper_bound: Some(Expr::BinaryOp { + left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("1"))), + }), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:5]", + Subscript::Slice { + lower_bound: None, + upper_bound: Some(Expr::Value(number("5"))), + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[2:]", + Subscript::Slice { + lower_bound: Some(Expr::Value(number("2"))), + upper_bound: None, + stride: None, + }, + ), + ( + "(ARRAY[1, 2, 3, 4, 5, 6])[:]", + Subscript::Slice { + lower_bound: None, + upper_bound: None, + stride: None, + }, + ), + ]; + for (sql, expect) in tests { + let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else { + panic!("expected subscript expr"); + }; + assert_eq!(expect, *subscript); + } + + pg_and_generic().verified_expr("schedule[:2][2:]"); +} + +#[test] +fn parse_array_multi_subscript() { + let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]"); + assert_eq!( + Expr::Subscript { + expr: Box::new(Expr::Subscript { + expr: Box::new(call( + "make_array", + vec![ + Expr::Value(number("1")), + Expr::Value(number("2")), + Expr::Value(number("3")) + ] + )), + subscript: Box::new(Subscript::Slice { + lower_bound: Some(Expr::Value(number("1"))), + upper_bound: Some(Expr::Value(number("2"))), + stride: None, + }), + }), + subscript: Box::new(Subscript::Index { + index: Expr::Value(number("2")), + }), + }, + expr, + ); +} + #[test] fn parse_create_index() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)"; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 30f2cc601..6c2228a5e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -344,6 +344,36 @@ fn parse_semi_structured_data_traversal() { })], select.projection ); + + // a json access used as a key to another json access + assert_eq!( + snowflake().verified_expr("a[b:c]"), + Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Bracket { + key: Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("b"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "c".to_owned(), + quoted: false + }] + } + } + }] + } + } + ); + + // unquoted object keys cannot start with a digit + assert_eq!( + snowflake() + .parse_sql_statements("SELECT a:42") + .unwrap_err() + .to_string(), + "sql parser error: Expected variant object key name, found: 42" + ); } #[test]