Skip to content

Commit

Permalink
feat: support raw string literal of BigQuery (#812)
Browse files Browse the repository at this point in the history
* add tests

* feat: parse raw literal of bq

* merge double quoted & single quoted to raw string literal

* Update src/ast/value.rs

---------

Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
togami2864 and alamb authored Mar 1, 2023
1 parent 70917a5 commit 58de3c1
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pub enum Value {
SingleQuotedByteStringLiteral(String),
/// B"string value"
DoubleQuotedByteStringLiteral(String),
/// R'string value' or r'string value' or r"string value"
/// <https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals>
RawStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// X'hex value'
Expand Down Expand Up @@ -74,6 +77,7 @@ impl fmt::Display for Value {
Value::Boolean(v) => write!(f, "{v}"),
Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"),
Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""),
Value::RawStringLiteral(v) => write!(f, "R'{v}'"),
Value::Null => write!(f, "NULL"),
Value::Placeholder(v) => write!(f, "{v}"),
Value::UnQuotedString(v) => write!(f, "{v}"),
Expand Down
2 changes: 2 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ impl<'a> Parser<'a> {
| Token::DollarQuotedString(_)
| Token::SingleQuotedByteStringLiteral(_)
| Token::DoubleQuotedByteStringLiteral(_)
| Token::RawStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Expand Down Expand Up @@ -4133,6 +4134,7 @@ impl<'a> Parser<'a> {
Token::DoubleQuotedByteStringLiteral(ref s) => {
Ok(Value::DoubleQuotedByteStringLiteral(s.clone()))
}
Token::RawStringLiteral(ref s) => Ok(Value::RawStringLiteral(s.clone())),
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
Expand Down
22 changes: 22 additions & 0 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ pub enum Token {
SingleQuotedByteStringLiteral(String),
/// Byte string literal: i.e: b"string" or B"string"
DoubleQuotedByteStringLiteral(String),
/// Raw string literal: i.e: r'string' or R'string' or r"string" or R"string"
RawStringLiteral(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
Expand Down Expand Up @@ -195,6 +197,7 @@ impl fmt::Display for Token {
Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"),
Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"),
Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""),
Token::RawStringLiteral(ref s) => write!(f, "R'{s}'"),
Token::Comma => f.write_str(","),
Token::Whitespace(ws) => write!(f, "{ws}"),
Token::DoubleEq => f.write_str("=="),
Expand Down Expand Up @@ -518,6 +521,25 @@ impl<'a> Tokenizer<'a> {
}
}
}
// BigQuery uses r or R for raw string literal
b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => {
chars.next(); // consume
match chars.peek() {
Some('\'') => {
let s = self.tokenize_quoted_string(chars, '\'')?;
Ok(Some(Token::RawStringLiteral(s)))
}
Some('\"') => {
let s = self.tokenize_quoted_string(chars, '\"')?;
Ok(Some(Token::RawStringLiteral(s)))
}
_ => {
// regular identifier starting with an "r" or "R"
let s = self.tokenize_word(b, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
}
// Redshift uses lower case n for national string literal
n @ 'N' | n @ 'n' => {
chars.next(); // consume, to check the next char
Expand Down
32 changes: 32 additions & 0 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,38 @@ fn parse_byte_literal() {
bigquery().one_statement_parses_to(sql, r#"SELECT B'abc', B"abc""#);
}

#[test]
fn parse_raw_literal() {
let sql = r#"SELECT R'abc', R"abc", R'f\(abc,(.*),def\)', R"f\(abc,(.*),def\)""#;
let stmt = bigquery().one_statement_parses_to(
sql,
r#"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'"#,
);
if let Statement::Query(query) = stmt {
if let SetExpr::Select(select) = *query.body {
assert_eq!(4, select.projection.len());
assert_eq!(
&Expr::Value(Value::RawStringLiteral("abc".to_string())),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Value(Value::RawStringLiteral("abc".to_string())),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())),
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())),
expr_from_projection(&select.projection[3])
);
return;
}
}
panic!("invalid query")
}

#[test]
fn parse_table_identifiers() {
fn test_table_ident(ident: &str, expected: Vec<Ident>) {
Expand Down

0 comments on commit 58de3c1

Please sign in to comment.