diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index ee39294a3..320dfc60e 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -679,7 +679,7 @@ pub enum Expr {
},
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
/// Note that depending on the dialect, struct like accesses may be
- /// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
+ /// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
///
MapAccess {
column: Box,
@@ -746,10 +746,10 @@ pub enum Expr {
/// ```
/// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs
Dictionary(Vec),
- /// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
- ArrayIndex {
- obj: Box,
- indexes: Vec,
+ /// An access of nested data using subscript syntax, for example `array[2]`.
+ Subscript {
+ expr: Box,
+ subscript: Box,
},
/// An array expression e.g. `ARRAY[1, 2]`
Array(Array),
@@ -805,6 +805,68 @@ pub enum Expr {
Lambda(LambdaFunction),
}
+/// The contents inside the `[` and `]` in a subscript expression.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum Subscript {
+ /// Accesses the element of the array at the given index.
+ Index { index: Expr },
+
+ /// Accesses a slice of an array on PostgreSQL, e.g.
+ ///
+ /// ```plaintext
+ /// => select (array[1,2,3,4,5,6])[2:5];
+ /// -----------
+ /// {2,3,4,5}
+ /// ```
+ ///
+ /// The lower and/or upper bound can be omitted to slice from the start or
+ /// end of the array respectively.
+ ///
+ /// See .
+ ///
+ /// Also supports an optional "stride" as the last element (this is not
+ /// supported by postgres), e.g.
+ ///
+ /// ```plaintext
+ /// => select (array[1,2,3,4,5,6])[1:6:2];
+ /// -----------
+ /// {1,3,5}
+ /// ```
+ Slice {
+ lower_bound: Option,
+ upper_bound: Option,
+ stride: Option,
+ },
+}
+
+impl fmt::Display for Subscript {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self {
+ Subscript::Index { index } => write!(f, "{index}"),
+ Subscript::Slice {
+ lower_bound,
+ upper_bound,
+ stride,
+ } => {
+ if let Some(lower) = lower_bound {
+ write!(f, "{lower}")?;
+ }
+ write!(f, ":")?;
+ if let Some(upper) = upper_bound {
+ write!(f, "{upper}")?;
+ }
+ if let Some(stride) = stride {
+ write!(f, ":")?;
+ write!(f, "{stride}")?;
+ }
+ Ok(())
+ }
+ }
+ }
+}
+
/// A lambda function.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -1251,12 +1313,11 @@ impl fmt::Display for Expr {
Expr::Dictionary(fields) => {
write!(f, "{{{}}}", display_comma_separated(fields))
}
- Expr::ArrayIndex { obj, indexes } => {
- write!(f, "{obj}")?;
- for i in indexes {
- write!(f, "[{i}]")?;
- }
- Ok(())
+ Expr::Subscript {
+ expr,
+ subscript: key,
+ } => {
+ write!(f, "{expr}[{key}]")
}
Expr::Array(set) => {
write!(f, "{set}")
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 123af045a..c6750644c 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -2544,8 +2544,7 @@ impl<'a> Parser<'a> {
})
} else if Token::LBracket == tok {
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
- // parse index
- self.parse_array_index(expr)
+ self.parse_subscript(expr)
} else if dialect_of!(self is SnowflakeDialect) {
self.prev_token();
self.parse_json_access(expr)
@@ -2573,18 +2572,87 @@ impl<'a> Parser<'a> {
}
}
- pub fn parse_array_index(&mut self, expr: Expr) -> Result {
- let index = self.parse_expr()?;
- self.expect_token(&Token::RBracket)?;
- let mut indexes: Vec = vec![index];
- while self.consume_token(&Token::LBracket) {
- let index = self.parse_expr()?;
+ /// Parses an array subscript like
+ /// * `[:]`
+ /// * `[l]`
+ /// * `[l:]`
+ /// * `[:u]`
+ /// * `[l:u]`
+ /// * `[l:u:s]`
+ ///
+ /// Parser is right after `[`
+ fn parse_subscript_inner(&mut self) -> Result {
+ // at either `:(rest)` or `:(rest)]`
+ let lower_bound = if self.consume_token(&Token::Colon) {
+ None
+ } else {
+ Some(self.parse_expr()?)
+ };
+
+ // check for end
+ if self.consume_token(&Token::RBracket) {
+ if let Some(lower_bound) = lower_bound {
+ return Ok(Subscript::Index { index: lower_bound });
+ };
+ return Ok(Subscript::Slice {
+ lower_bound,
+ upper_bound: None,
+ stride: None,
+ });
+ }
+
+ // consume the `:`
+ if lower_bound.is_some() {
+ self.expect_token(&Token::Colon)?;
+ }
+
+ // we are now at either `]`, `(rest)]`
+ let upper_bound = if self.consume_token(&Token::RBracket) {
+ return Ok(Subscript::Slice {
+ lower_bound,
+ upper_bound: None,
+ stride: None,
+ });
+ } else {
+ Some(self.parse_expr()?)
+ };
+
+ // check for end
+ if self.consume_token(&Token::RBracket) {
+ return Ok(Subscript::Slice {
+ lower_bound,
+ upper_bound,
+ stride: None,
+ });
+ }
+
+ // we are now at `:]` or `:stride]`
+ self.expect_token(&Token::Colon)?;
+ let stride = if self.consume_token(&Token::RBracket) {
+ None
+ } else {
+ Some(self.parse_expr()?)
+ };
+
+ if stride.is_some() {
self.expect_token(&Token::RBracket)?;
- indexes.push(index);
}
- Ok(Expr::ArrayIndex {
- obj: Box::new(expr),
- indexes,
+
+ Ok(Subscript::Slice {
+ lower_bound,
+ upper_bound,
+ stride,
+ })
+ }
+
+ /// Parses an array subscript like `[1:3]`
+ ///
+ /// Parser is right after `[`
+ pub fn parse_subscript(&mut self, expr: Expr) -> Result {
+ let subscript = self.parse_subscript_inner()?;
+ Ok(Expr::Subscript {
+ expr: Box::new(expr),
+ subscript: Box::new(subscript),
})
}
@@ -2838,7 +2906,7 @@ impl<'a> Parser<'a> {
Ok(Self::MUL_DIV_MOD_OP_PREC)
}
Token::DoubleColon => Ok(50),
- Token::Colon => Ok(50),
+ Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
Token::ExclamationMark => Ok(50),
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
Token::Arrow
diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs
index a84da5378..8d12945dd 100644
--- a/tests/sqlparser_duckdb.rs
+++ b/tests/sqlparser_duckdb.rs
@@ -528,8 +528,8 @@ fn test_array_index() {
_ => panic!("Expected an expression with alias"),
};
assert_eq!(
- &Expr::ArrayIndex {
- obj: Box::new(Expr::Array(Array {
+ &Expr::Subscript {
+ expr: Box::new(Expr::Array(Array {
elem: vec![
Expr::Value(Value::SingleQuotedString("a".to_owned())),
Expr::Value(Value::SingleQuotedString("b".to_owned())),
@@ -537,7 +537,9 @@ fn test_array_index() {
],
named: false
})),
- indexes: vec![Expr::Value(number("3"))]
+ subscript: Box::new(Subscript::Index {
+ index: Expr::Value(number("3"))
+ })
},
expr
);
diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs
index ffcd783f0..677246a51 100644
--- a/tests/sqlparser_postgres.rs
+++ b/tests/sqlparser_postgres.rs
@@ -1873,9 +1873,11 @@ fn parse_array_index_expr() {
let sql = "SELECT foo[0] FROM foos";
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::ArrayIndex {
- obj: Box::new(Expr::Identifier(Ident::new("foo"))),
- indexes: vec![num[0].clone()],
+ &Expr::Subscript {
+ expr: Box::new(Expr::Identifier(Ident::new("foo"))),
+ subscript: Box::new(Subscript::Index {
+ index: num[0].clone()
+ }),
},
expr_from_projection(only(&select.projection)),
);
@@ -1883,9 +1885,16 @@ fn parse_array_index_expr() {
let sql = "SELECT foo[0][0] FROM foos";
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::ArrayIndex {
- obj: Box::new(Expr::Identifier(Ident::new("foo"))),
- indexes: vec![num[0].clone(), num[0].clone()],
+ &Expr::Subscript {
+ expr: Box::new(Expr::Subscript {
+ expr: Box::new(Expr::Identifier(Ident::new("foo"))),
+ subscript: Box::new(Subscript::Index {
+ index: num[0].clone()
+ }),
+ }),
+ subscript: Box::new(Subscript::Index {
+ index: num[0].clone()
+ }),
},
expr_from_projection(only(&select.projection)),
);
@@ -1893,19 +1902,27 @@ fn parse_array_index_expr() {
let sql = r#"SELECT bar[0]["baz"]["fooz"] FROM foos"#;
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::ArrayIndex {
- obj: Box::new(Expr::Identifier(Ident::new("bar"))),
- indexes: vec![
- num[0].clone(),
- Expr::Identifier(Ident {
- value: "baz".to_string(),
- quote_style: Some('"')
+ &Expr::Subscript {
+ expr: Box::new(Expr::Subscript {
+ expr: Box::new(Expr::Subscript {
+ expr: Box::new(Expr::Identifier(Ident::new("bar"))),
+ subscript: Box::new(Subscript::Index {
+ index: num[0].clone()
+ })
}),
- Expr::Identifier(Ident {
+ subscript: Box::new(Subscript::Index {
+ index: Expr::Identifier(Ident {
+ value: "baz".to_string(),
+ quote_style: Some('"')
+ })
+ })
+ }),
+ subscript: Box::new(Subscript::Index {
+ index: Expr::Identifier(Ident {
value: "fooz".to_string(),
quote_style: Some('"')
})
- ],
+ })
},
expr_from_projection(only(&select.projection)),
);
@@ -1913,26 +1930,33 @@ fn parse_array_index_expr() {
let sql = "SELECT (CAST(ARRAY[ARRAY[2, 3]] AS INT[][]))[1][2]";
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(
- &Expr::ArrayIndex {
- obj: Box::new(Expr::Nested(Box::new(Expr::Cast {
- kind: CastKind::Cast,
- expr: Box::new(Expr::Array(Array {
- elem: vec![Expr::Array(Array {
- elem: vec![num[2].clone(), num[3].clone(),],
+ &Expr::Subscript {
+ expr: Box::new(Expr::Subscript {
+ expr: Box::new(Expr::Nested(Box::new(Expr::Cast {
+ kind: CastKind::Cast,
+ expr: Box::new(Expr::Array(Array {
+ elem: vec![Expr::Array(Array {
+ elem: vec![num[2].clone(), num[3].clone(),],
+ named: true,
+ })],
named: true,
- })],
- named: true,
- })),
- data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
- Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket(
- Box::new(DataType::Int(None)),
+ })),
+ data_type: DataType::Array(ArrayElemTypeDef::SquareBracket(
+ Box::new(DataType::Array(ArrayElemTypeDef::SquareBracket(
+ Box::new(DataType::Int(None)),
+ None
+ ))),
None
- ))),
- None
- )),
- format: None,
- }))),
- indexes: vec![num[1].clone(), num[2].clone()],
+ )),
+ format: None,
+ }))),
+ subscript: Box::new(Subscript::Index {
+ index: num[1].clone()
+ }),
+ }),
+ subscript: Box::new(Subscript::Index {
+ index: num[2].clone()
+ }),
},
expr_from_projection(only(&select.projection)),
);
@@ -1948,6 +1972,116 @@ fn parse_array_index_expr() {
);
}
+#[test]
+fn parse_array_subscript() {
+ let tests = [
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[2]",
+ Subscript::Index {
+ index: Expr::Value(number("2")),
+ },
+ ),
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[foo]",
+ Subscript::Index {
+ index: Expr::Identifier(Ident::new("foo")),
+ },
+ ),
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[2:5]",
+ Subscript::Slice {
+ lower_bound: Some(Expr::Value(number("2"))),
+ upper_bound: Some(Expr::Value(number("5"))),
+ stride: None,
+ },
+ ),
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[2:5:3]",
+ Subscript::Slice {
+ lower_bound: Some(Expr::Value(number("2"))),
+ upper_bound: Some(Expr::Value(number("5"))),
+ stride: Some(Expr::Value(number("3"))),
+ },
+ ),
+ (
+ "arr[array_length(arr) - 3:array_length(arr) - 1]",
+ Subscript::Slice {
+ lower_bound: Some(Expr::BinaryOp {
+ left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])),
+ op: BinaryOperator::Minus,
+ right: Box::new(Expr::Value(number("3"))),
+ }),
+ upper_bound: Some(Expr::BinaryOp {
+ left: Box::new(call("array_length", [Expr::Identifier(Ident::new("arr"))])),
+ op: BinaryOperator::Minus,
+ right: Box::new(Expr::Value(number("1"))),
+ }),
+ stride: None,
+ },
+ ),
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[:5]",
+ Subscript::Slice {
+ lower_bound: None,
+ upper_bound: Some(Expr::Value(number("5"))),
+ stride: None,
+ },
+ ),
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[2:]",
+ Subscript::Slice {
+ lower_bound: Some(Expr::Value(number("2"))),
+ upper_bound: None,
+ stride: None,
+ },
+ ),
+ (
+ "(ARRAY[1, 2, 3, 4, 5, 6])[:]",
+ Subscript::Slice {
+ lower_bound: None,
+ upper_bound: None,
+ stride: None,
+ },
+ ),
+ ];
+ for (sql, expect) in tests {
+ let Expr::Subscript { subscript, .. } = pg_and_generic().verified_expr(sql) else {
+ panic!("expected subscript expr");
+ };
+ assert_eq!(expect, *subscript);
+ }
+
+ pg_and_generic().verified_expr("schedule[:2][2:]");
+}
+
+#[test]
+fn parse_array_multi_subscript() {
+ let expr = pg_and_generic().verified_expr("make_array(1, 2, 3)[1:2][2]");
+ assert_eq!(
+ Expr::Subscript {
+ expr: Box::new(Expr::Subscript {
+ expr: Box::new(call(
+ "make_array",
+ vec![
+ Expr::Value(number("1")),
+ Expr::Value(number("2")),
+ Expr::Value(number("3"))
+ ]
+ )),
+ subscript: Box::new(Subscript::Slice {
+ lower_bound: Some(Expr::Value(number("1"))),
+ upper_bound: Some(Expr::Value(number("2"))),
+ stride: None,
+ }),
+ }),
+ subscript: Box::new(Subscript::Index {
+ index: Expr::Value(number("2")),
+ }),
+ },
+ expr,
+ );
+}
+
#[test]
fn parse_create_index() {
let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2)";
diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs
index 7492802c7..d213efd7b 100644
--- a/tests/sqlparser_snowflake.rs
+++ b/tests/sqlparser_snowflake.rs
@@ -394,6 +394,36 @@ fn parse_semi_structured_data_traversal() {
})],
select.projection
);
+
+ // a json access used as a key to another json access
+ assert_eq!(
+ snowflake().verified_expr("a[b:c]"),
+ Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("a"))),
+ path: JsonPath {
+ path: vec![JsonPathElem::Bracket {
+ key: Expr::JsonAccess {
+ value: Box::new(Expr::Identifier(Ident::new("b"))),
+ path: JsonPath {
+ path: vec![JsonPathElem::Dot {
+ key: "c".to_owned(),
+ quoted: false
+ }]
+ }
+ }
+ }]
+ }
+ }
+ );
+
+ // unquoted object keys cannot start with a digit
+ assert_eq!(
+ snowflake()
+ .parse_sql_statements("SELECT a:42")
+ .unwrap_err()
+ .to_string(),
+ "sql parser error: Expected variant object key name, found: 42"
+ );
}
#[test]