Skip to content

Commit

Permalink
Support for Postgres array slice syntax (#1290)
Browse files Browse the repository at this point in the history
Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
jmhain and alamb authored May 31, 2024
1 parent 80c03f5 commit afa5f08
Show file tree
Hide file tree
Showing 5 changed files with 355 additions and 60 deletions.
83 changes: 72 additions & 11 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -679,7 +679,7 @@ pub enum Expr {
},
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
/// Note that depending on the dialect, struct like accesses may be
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
/// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
MapAccess {
column: Box<Expr>,
Expand Down Expand Up @@ -746,10 +746,10 @@ pub enum Expr {
/// ```
/// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs
Dictionary(Vec<DictionaryField>),
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
ArrayIndex {
obj: Box<Expr>,
indexes: Vec<Expr>,
/// An access of nested data using subscript syntax, for example `array[2]`.
Subscript {
expr: Box<Expr>,
subscript: Box<Subscript>,
},
/// An array expression e.g. `ARRAY[1, 2]`
Array(Array),
Expand Down Expand Up @@ -805,6 +805,68 @@ pub enum Expr {
Lambda(LambdaFunction),
}

/// The contents inside the `[` and `]` in a subscript expression.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum Subscript {
/// Accesses the element of the array at the given index.
Index { index: Expr },

/// Accesses a slice of an array on PostgreSQL, e.g.
///
/// ```plaintext
/// => select (array[1,2,3,4,5,6])[2:5];
/// -----------
/// {2,3,4,5}
/// ```
///
/// The lower and/or upper bound can be omitted to slice from the start or
/// end of the array respectively.
///
/// See <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING>.
///
/// Also supports an optional "stride" as the last element (this is not
/// supported by postgres), e.g.
///
/// ```plaintext
/// => select (array[1,2,3,4,5,6])[1:6:2];
/// -----------
/// {1,3,5}
/// ```
Slice {
lower_bound: Option<Expr>,
upper_bound: Option<Expr>,
stride: Option<Expr>,
},
}

impl fmt::Display for Subscript {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Subscript::Index { index } => write!(f, "{index}"),
Subscript::Slice {
lower_bound,
upper_bound,
stride,
} => {
if let Some(lower) = lower_bound {
write!(f, "{lower}")?;
}
write!(f, ":")?;
if let Some(upper) = upper_bound {
write!(f, "{upper}")?;
}
if let Some(stride) = stride {
write!(f, ":")?;
write!(f, "{stride}")?;
}
Ok(())
}
}
}
}

/// A lambda function.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
Expand Down Expand Up @@ -1251,12 +1313,11 @@ impl fmt::Display for Expr {
Expr::Dictionary(fields) => {
write!(f, "{{{}}}", display_comma_separated(fields))
}
Expr::ArrayIndex { obj, indexes } => {
write!(f, "{obj}")?;
for i in indexes {
write!(f, "[{i}]")?;
}
Ok(())
Expr::Subscript {
expr,
subscript: key,
} => {
write!(f, "{expr}[{key}]")
}
Expr::Array(set) => {
write!(f, "{set}")
Expand Down
94 changes: 81 additions & 13 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2544,8 +2544,7 @@ impl<'a> Parser<'a> {
})
} else if Token::LBracket == tok {
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
// parse index
self.parse_array_index(expr)
self.parse_subscript(expr)
} else if dialect_of!(self is SnowflakeDialect) {
self.prev_token();
self.parse_json_access(expr)
Expand Down Expand Up @@ -2573,18 +2572,87 @@ impl<'a> Parser<'a> {
}
}

pub fn parse_array_index(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let index = self.parse_expr()?;
self.expect_token(&Token::RBracket)?;
let mut indexes: Vec<Expr> = vec![index];
while self.consume_token(&Token::LBracket) {
let index = self.parse_expr()?;
/// Parses an array subscript like
/// * `[:]`
/// * `[l]`
/// * `[l:]`
/// * `[:u]`
/// * `[l:u]`
/// * `[l:u:s]`
///
/// Parser is right after `[`
fn parse_subscript_inner(&mut self) -> Result<Subscript, ParserError> {
// at either `<lower>:(rest)` or `:(rest)]`
let lower_bound = if self.consume_token(&Token::Colon) {
None
} else {
Some(self.parse_expr()?)
};

// check for end
if self.consume_token(&Token::RBracket) {
if let Some(lower_bound) = lower_bound {
return Ok(Subscript::Index { index: lower_bound });
};
return Ok(Subscript::Slice {
lower_bound,
upper_bound: None,
stride: None,
});
}

// consume the `:`
if lower_bound.is_some() {
self.expect_token(&Token::Colon)?;
}

// we are now at either `]`, `<upper>(rest)]`
let upper_bound = if self.consume_token(&Token::RBracket) {
return Ok(Subscript::Slice {
lower_bound,
upper_bound: None,
stride: None,
});
} else {
Some(self.parse_expr()?)
};

// check for end
if self.consume_token(&Token::RBracket) {
return Ok(Subscript::Slice {
lower_bound,
upper_bound,
stride: None,
});
}

// we are now at `:]` or `:stride]`
self.expect_token(&Token::Colon)?;
let stride = if self.consume_token(&Token::RBracket) {
None
} else {
Some(self.parse_expr()?)
};

if stride.is_some() {
self.expect_token(&Token::RBracket)?;
indexes.push(index);
}
Ok(Expr::ArrayIndex {
obj: Box::new(expr),
indexes,

Ok(Subscript::Slice {
lower_bound,
upper_bound,
stride,
})
}

/// Parses an array subscript like `[1:3]`
///
/// Parser is right after `[`
pub fn parse_subscript(&mut self, expr: Expr) -> Result<Expr, ParserError> {
let subscript = self.parse_subscript_inner()?;
Ok(Expr::Subscript {
expr: Box::new(expr),
subscript: Box::new(subscript),
})
}

Expand Down Expand Up @@ -2838,7 +2906,7 @@ impl<'a> Parser<'a> {
Ok(Self::MUL_DIV_MOD_OP_PREC)
}
Token::DoubleColon => Ok(50),
Token::Colon => Ok(50),
Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
Token::ExclamationMark => Ok(50),
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
Token::Arrow
Expand Down
8 changes: 5 additions & 3 deletions tests/sqlparser_duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -528,16 +528,18 @@ fn test_array_index() {
_ => panic!("Expected an expression with alias"),
};
assert_eq!(
&Expr::ArrayIndex {
obj: Box::new(Expr::Array(Array {
&Expr::Subscript {
expr: Box::new(Expr::Array(Array {
elem: vec![
Expr::Value(Value::SingleQuotedString("a".to_owned())),
Expr::Value(Value::SingleQuotedString("b".to_owned())),
Expr::Value(Value::SingleQuotedString("c".to_owned()))
],
named: false
})),
indexes: vec![Expr::Value(number("3"))]
subscript: Box::new(Subscript::Index {
index: Expr::Value(number("3"))
})
},
expr
);
Expand Down
Loading

0 comments on commit afa5f08

Please sign in to comment.