diff --git a/src/ast/value.rs b/src/ast/value.rs index 154aafc76..95ea978d0 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -45,6 +45,9 @@ pub enum Value { SingleQuotedByteStringLiteral(String), /// B"string value" DoubleQuotedByteStringLiteral(String), + /// R'string value' or r'string value' or r"string value" + /// + RawStringLiteral(String), /// N'string value' NationalStringLiteral(String), /// X'hex value' @@ -74,6 +77,7 @@ impl fmt::Display for Value { Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), Value::DoubleQuotedByteStringLiteral(v) => write!(f, "B\"{v}\""), + Value::RawStringLiteral(v) => write!(f, "R'{v}'"), Value::Null => write!(f, "NULL"), Value::Placeholder(v) => write!(f, "{v}"), Value::UnQuotedString(v) => write!(f, "{v}"), diff --git a/src/parser.rs b/src/parser.rs index 0473b5181..4da20e15e 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -792,6 +792,7 @@ impl<'a> Parser<'a> { | Token::DollarQuotedString(_) | Token::SingleQuotedByteStringLiteral(_) | Token::DoubleQuotedByteStringLiteral(_) + | Token::RawStringLiteral(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); @@ -4133,6 +4134,7 @@ impl<'a> Parser<'a> { Token::DoubleQuotedByteStringLiteral(ref s) => { Ok(Value::DoubleQuotedByteStringLiteral(s.clone())) } + Token::RawStringLiteral(ref s) => Ok(Value::RawStringLiteral(s.clone())), Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index b05667c2b..8134947df 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -62,6 +62,8 @@ pub enum Token { SingleQuotedByteStringLiteral(String), /// Byte string literal: i.e: b"string" or B"string" DoubleQuotedByteStringLiteral(String), + /// Raw string literal: i.e: r'string' or R'string' or r"string" or R"string" + RawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' @@ -195,6 +197,7 @@ impl fmt::Display for Token { Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), Token::SingleQuotedByteStringLiteral(ref s) => write!(f, "B'{s}'"), Token::DoubleQuotedByteStringLiteral(ref s) => write!(f, "B\"{s}\""), + Token::RawStringLiteral(ref s) => write!(f, "R'{s}'"), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{ws}"), Token::DoubleEq => f.write_str("=="), @@ -518,6 +521,25 @@ impl<'a> Tokenizer<'a> { } } } + // BigQuery uses r or R for raw string literal + b @ 'R' | b @ 'r' if dialect_of!(self is BigQueryDialect | GenericDialect) => { + chars.next(); // consume + match chars.peek() { + Some('\'') => { + let s = self.tokenize_quoted_string(chars, '\'')?; + Ok(Some(Token::RawStringLiteral(s))) + } + Some('\"') => { + let s = self.tokenize_quoted_string(chars, '\"')?; + Ok(Some(Token::RawStringLiteral(s))) + } + _ => { + // regular identifier starting with an "r" or "R" + let s = self.tokenize_word(b, chars); + Ok(Some(Token::make_word(&s, None))) + } + } + } // Redshift uses lower case n for national string literal n @ 'N' | n @ 'n' => { chars.next(); // consume, to check the next char diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 1a04bc8ba..85b540e35 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -55,6 +55,38 @@ fn parse_byte_literal() { bigquery().one_statement_parses_to(sql, r#"SELECT B'abc', B"abc""#); } +#[test] +fn parse_raw_literal() { + let sql = r#"SELECT R'abc', R"abc", R'f\(abc,(.*),def\)', R"f\(abc,(.*),def\)""#; + let stmt = bigquery().one_statement_parses_to( + sql, + r#"SELECT R'abc', R'abc', R'f\(abc,(.*),def\)', R'f\(abc,(.*),def\)'"#, + ); + if let Statement::Query(query) = stmt { + if let SetExpr::Select(select) = *query.body { + assert_eq!(4, select.projection.len()); + assert_eq!( + &Expr::Value(Value::RawStringLiteral("abc".to_string())), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value(Value::RawStringLiteral("abc".to_string())), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value(Value::RawStringLiteral(r#"f\(abc,(.*),def\)"#.to_string())), + expr_from_projection(&select.projection[3]) + ); + return; + } + } + panic!("invalid query") +} + #[test] fn parse_table_identifiers() { fn test_table_ident(ident: &str, expected: Vec) {