Skip to content

Commit

Permalink
fix(parser): fix lexing escaped and unicode characters in block strings
Browse files Browse the repository at this point in the history
fixes #341 fixes #342
  • Loading branch information
lrlna committed Nov 10, 2022
1 parent c5e2f95 commit c6e1339
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 18 deletions.
27 changes: 17 additions & 10 deletions crates/apollo-parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,6 @@ impl Cursor<'_> {
} else if is_line_terminator(c) {
self.add_err(Error::new("unexpected line terminator", c.to_string()));
}
was_backslash = c == '\\';
}

if !buf.ends_with('"') {
Expand Down Expand Up @@ -214,19 +213,27 @@ impl Cursor<'_> {

while !self.is_eof() {
let c = self.bump().unwrap();
if c == '"' {
buf.push(c);
let was_backslash = c == '\\';

if was_backslash && !is_escaped_char(c) && c != 'u' {
self.add_err(Error::new("unexpected escaped character", c.to_string()));
}

buf.push(c);

if was_backslash {
while self.first() == '"' {
buf.push(self.first());
self.bump();
}
} else if c == '"' {
if ('"', '"') == (self.first(), self.second()) {
buf.push(self.first());
buf.push(self.second());
self.bump();
self.bump();
break;
}
} else if is_source_char(c) {
buf.push(c);
} else {
break;
}
}
}
Expand Down Expand Up @@ -437,9 +444,9 @@ fn is_escaped_char(c: char) -> bool {

// SourceCharacter
// /[\u0009\u000A\u000D\u0020-\uFFFF]/
fn is_source_char(c: char) -> bool {
matches!(c, '\t' | '\r' | '\n' | '\u{0020}'..='\u{FFFF}')
}
// fn is_source_char(c: char) -> bool {
// matches!(c, '\t' | '\r' | '\n' | '\u{0020}'..='\u{FFFF}')
// }

#[cfg(test)]
mod test {
Expand Down
33 changes: 25 additions & 8 deletions crates/apollo-parser/test_data/lexer/ok/0008_block_string.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,28 @@ WHITESPACE@47:48 " "
NAME@48:54 "Filter"
WHITESPACE@54:55 " "
L_CURLY@55:56 "{"
WHITESPACE@56:61 "\n "
NAME@61:66 "title"
COLON@66:67 ":"
WHITESPACE@67:68 " "
NAME@68:74 "String"
WHITESPACE@74:75 "\n"
R_CURLY@75:76 "}"
EOF@76:76
WHITESPACE@56:59 "\n "
STRING_VALUE@59:93 "\"\"\"unicode in block string 🤷\"\"\""
WHITESPACE@93:98 "\n "
NAME@98:103 "title"
COLON@103:104 ":"
WHITESPACE@104:105 " "
NAME@105:111 "String"
WHITESPACE@111:112 "\n"
R_CURLY@112:113 "}"
WHITESPACE@113:115 "\n\n"
STRING_VALUE@115:136 "\"\"\"\n\\\"\"\" a/b \\\"\"\"\n\"\"\""
WHITESPACE@136:137 "\n"
NAME@137:142 "input"
WHITESPACE@142:143 " "
NAME@143:149 "Filter"
WHITESPACE@149:150 " "
L_CURLY@150:151 "{"
WHITESPACE@151:156 "\n "
NAME@156:161 "title"
COLON@161:162 ":"
WHITESPACE@162:163 " "
NAME@163:169 "String"
WHITESPACE@169:170 "\n"
R_CURLY@170:171 "}"
EOF@171:171

0 comments on commit c6e1339

Please sign in to comment.