Skip to content

Commit

Permalink
Merge pull request #395 from dtolnay/cstr
Browse files Browse the repository at this point in the history
Implement `c"…"` literal parsing
  • Loading branch information
dtolnay authored Jun 24, 2023
2 parents 3c20a59 + 96c97e5 commit 3f6d84b
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 13 deletions.
121 changes: 108 additions & 13 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,11 @@ fn leaf_token(input: Cursor) -> PResult<TokenTree> {
}

fn ident(input: Cursor) -> PResult<crate::Ident> {
if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"]
.iter()
.any(|prefix| input.starts_with(prefix))
if [
"r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
]
.iter()
.any(|prefix| input.starts_with(prefix))
{
Err(Reject)
} else {
Expand Down Expand Up @@ -337,6 +339,8 @@ fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
Ok(ok)
} else if let Ok(ok) = byte_string(input) {
Ok(ok)
} else if let Ok(ok) = c_string(input) {
Ok(ok)
} else if let Ok(ok) = byte(input) {
Ok(ok)
} else if let Ok(ok) = character(input) {
Expand Down Expand Up @@ -389,9 +393,7 @@ fn cooked_string(input: Cursor) -> Result<Cursor, Reject> {
Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
| Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
Some((_, 'u')) => {
if !backslash_u(&mut chars) {
break;
}
backslash_u(&mut chars)?;
}
Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
let mut last = ch;
Expand Down Expand Up @@ -539,6 +541,87 @@ fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
Err(Reject)
}

fn c_string(input: Cursor) -> Result<Cursor, Reject> {
if let Ok(input) = input.parse("c\"") {
cooked_c_string(input)
} else if let Ok(input) = input.parse("cr") {
raw_c_string(input)
} else {
Err(Reject)
}
}

fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
let (input, delimiter) = delimiter_of_raw_string(input)?;
let mut bytes = input.bytes().enumerate();
while let Some((i, byte)) = bytes.next() {
match byte {
b'"' if input.rest[i + 1..].starts_with(delimiter) => {
let rest = input.advance(i + 1 + delimiter.len());
return Ok(literal_suffix(rest));
}
b'\r' => match bytes.next() {
Some((_, b'\n')) => {}
_ => break,
},
b'\0' => break,
_ => {}
}
}
Err(Reject)
}

fn cooked_c_string(input: Cursor) -> Result<Cursor, Reject> {
let mut chars = input.char_indices().peekable();

while let Some((i, ch)) = chars.next() {
match ch {
'"' => {
let input = input.advance(i + 1);
return Ok(literal_suffix(input));
}
'\r' => match chars.next() {
Some((_, '\n')) => {}
_ => break,
},
'\\' => match chars.next() {
Some((_, 'x')) => {
if !backslash_x_nonzero(&mut chars) {
break;
}
}
Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
| Some((_, '\'')) | Some((_, '"')) => {}
Some((_, 'u')) => {
if backslash_u(&mut chars)? == '\0' {
break;
}
}
Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
let mut last = ch;
loop {
if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
return Err(Reject);
}
match chars.peek() {
Some((_, ch @ ' ')) | Some((_, ch @ '\t')) | Some((_, ch @ '\n'))
| Some((_, ch @ '\r')) => {
last = *ch;
chars.next();
}
_ => break,
}
}
}
_ => break,
},
'\0' => break,
_ch => {}
}
}
Err(Reject)
}

fn byte(input: Cursor) -> Result<Cursor, Reject> {
let input = input.parse("b'")?;
let mut bytes = input.bytes().enumerate();
Expand Down Expand Up @@ -568,7 +651,7 @@ fn character(input: Cursor) -> Result<Cursor, Reject> {
let ok = match chars.next().map(|(_, ch)| ch) {
Some('\\') => match chars.next().map(|(_, ch)| ch) {
Some('x') => backslash_x_char(&mut chars),
Some('u') => backslash_u(&mut chars),
Some('u') => backslash_u(&mut chars).is_ok(),
Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
true
}
Expand Down Expand Up @@ -614,11 +697,23 @@ where
true
}

fn backslash_u<I>(chars: &mut I) -> bool
fn backslash_x_nonzero<I>(chars: &mut I) -> bool
where
I: Iterator<Item = (usize, char)>,
{
let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
!(first == '0' && second == '0')
}

fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
where
I: Iterator<Item = (usize, char)>,
{
next_ch!(chars @ '{');
match chars.next() {
Some((_, '{')) => {}
_ => return Err(Reject),
}
let mut value = 0;
let mut len = 0;
for (_, ch) in chars {
Expand All @@ -627,17 +722,17 @@ where
'a'..='f' => 10 + ch as u8 - b'a',
'A'..='F' => 10 + ch as u8 - b'A',
'_' if len > 0 => continue,
'}' if len > 0 => return char::from_u32(value).is_some(),
_ => return false,
'}' if len > 0 => return char::from_u32(value).ok_or(Reject),
_ => break,
};
if len == 6 {
return false;
break;
}
value *= 0x10;
value += u32::from(digit);
len += 1;
}
false
Err(Reject)
}

fn float(input: Cursor) -> Result<Cursor, Reject> {
Expand Down
36 changes: 36 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,42 @@ fn literal_byte_string() {
"br\"\u{a0}\"".parse::<TokenStream>().unwrap_err();
}

#[test]
fn literal_c_string() {
let strings = r###"
c"hello\x80我叫\u{1F980}" // from the RFC
cr"\"
cr##"Hello "world"!"##
c"\t\n\r\"\\"
"###;

let mut tokens = strings.parse::<TokenStream>().unwrap().into_iter();

for expected in &[
r#"c"hello\x80我叫\u{1F980}""#,
r#"cr"\""#,
r###"cr##"Hello "world"!"##"###,
r#"c"\t\n\r\"\\""#,
] {
match tokens.next().unwrap() {
TokenTree::Literal(literal) => {
assert_eq!(literal.to_string(), *expected);
}
unexpected => panic!("unexpected token: {:?}", unexpected),
}
}

if let Some(unexpected) = tokens.next() {
panic!("unexpected token: {:?}", unexpected);
}

for invalid in &[r#"c"\0""#, r#"c"\x00""#, r#"c"\u{0}""#, "c\"\0\""] {
if let Ok(unexpected) = invalid.parse::<TokenStream>() {
panic!("unexpected token: {:?}", unexpected);
}
}
}

#[test]
fn literal_character() {
assert_eq!(Literal::character('x').to_string(), "'x'");
Expand Down

0 comments on commit 3f6d84b

Please sign in to comment.