From 6eb76ef3aada3b424abfb760f71b182a5988d55e Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Mon, 8 Apr 2024 19:22:27 -0700 Subject: [PATCH] Add Lit::CStr --- Cargo.toml | 2 +- src/gen/clone.rs | 1 + src/gen/debug.rs | 1 + src/gen/eq.rs | 3 + src/gen/fold.rs | 13 +++ src/gen/hash.rs | 16 ++-- src/gen/visit.rs | 10 +++ src/gen/visit_mut.rs | 10 +++ src/lib.rs | 4 +- src/lit.rs | 196 +++++++++++++++++++++++++++++++++++++++++-- syn.json | 11 +++ tests/debug/gen.rs | 6 ++ 12 files changed, 256 insertions(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index def141422f..e3b83601b4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,7 @@ proc-macro = ["proc-macro2/proc-macro", "quote?/proc-macro"] test = ["syn-test-suite/all-features"] [dependencies] -proc-macro2 = { version = "1.0.75", default-features = false } +proc-macro2 = { version = "1.0.80", default-features = false } quote = { version = "1.0.35", optional = true, default-features = false } unicode-ident = "1" diff --git a/src/gen/clone.rs b/src/gen/clone.rs index 3313d4db21..c4007a7380 100644 --- a/src/gen/clone.rs +++ b/src/gen/clone.rs @@ -1338,6 +1338,7 @@ impl Clone for crate::Lit { match self { crate::Lit::Str(v0) => crate::Lit::Str(v0.clone()), crate::Lit::ByteStr(v0) => crate::Lit::ByteStr(v0.clone()), + crate::Lit::CStr(v0) => crate::Lit::CStr(v0.clone()), crate::Lit::Byte(v0) => crate::Lit::Byte(v0.clone()), crate::Lit::Char(v0) => crate::Lit::Char(v0.clone()), crate::Lit::Int(v0) => crate::Lit::Int(v0.clone()), diff --git a/src/gen/debug.rs b/src/gen/debug.rs index 2dc531ead3..9db6114843 100644 --- a/src/gen/debug.rs +++ b/src/gen/debug.rs @@ -1958,6 +1958,7 @@ impl Debug for crate::Lit { match self { crate::Lit::Str(v0) => v0.debug(formatter, "Str"), crate::Lit::ByteStr(v0) => v0.debug(formatter, "ByteStr"), + crate::Lit::CStr(v0) => v0.debug(formatter, "CStr"), crate::Lit::Byte(v0) => v0.debug(formatter, "Byte"), crate::Lit::Char(v0) => v0.debug(formatter, "Char"), crate::Lit::Int(v0) => v0.debug(formatter, "Int"), diff --git a/src/gen/eq.rs b/src/gen/eq.rs index 9bfce5f2ab..555c48b95a 100644 --- a/src/gen/eq.rs +++ b/src/gen/eq.rs @@ -1300,6 +1300,7 @@ impl PartialEq for crate::Lit { match (self, other) { (crate::Lit::Str(self0), crate::Lit::Str(other0)) => self0 == other0, (crate::Lit::ByteStr(self0), crate::Lit::ByteStr(other0)) => self0 == other0, + (crate::Lit::CStr(self0), crate::Lit::CStr(other0)) => self0 == other0, (crate::Lit::Byte(self0), crate::Lit::Byte(other0)) => self0 == other0, (crate::Lit::Char(self0), crate::Lit::Char(other0)) => self0 == other0, (crate::Lit::Int(self0), crate::Lit::Int(other0)) => self0 == other0, @@ -1325,6 +1326,8 @@ impl Eq for crate::LitByte {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Eq for crate::LitByteStr {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] +impl Eq for crate::LitCStr {} +#[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Eq for crate::LitChar {} #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Eq for crate::LitFloat {} diff --git a/src/gen/fold.rs b/src/gen/fold.rs index 872ffe10ed..ec2aed49db 100644 --- a/src/gen/fold.rs +++ b/src/gen/fold.rs @@ -581,6 +581,9 @@ pub trait Fold { fn fold_lit_byte_str(&mut self, i: crate::LitByteStr) -> crate::LitByteStr { fold_lit_byte_str(self, i) } + fn fold_lit_cstr(&mut self, i: crate::LitCStr) -> crate::LitCStr { + fold_lit_cstr(self, i) + } fn fold_lit_char(&mut self, i: crate::LitChar) -> crate::LitChar { fold_lit_char(self, i) } @@ -2628,6 +2631,7 @@ where crate::Lit::ByteStr(_binding_0) => { crate::Lit::ByteStr(f.fold_lit_byte_str(_binding_0)) } + crate::Lit::CStr(_binding_0) => crate::Lit::CStr(f.fold_lit_cstr(_binding_0)), crate::Lit::Byte(_binding_0) => crate::Lit::Byte(f.fold_lit_byte(_binding_0)), crate::Lit::Char(_binding_0) => crate::Lit::Char(f.fold_lit_char(_binding_0)), crate::Lit::Int(_binding_0) => crate::Lit::Int(f.fold_lit_int(_binding_0)), @@ -2663,6 +2667,15 @@ where node.set_span(span); node } +pub fn fold_lit_cstr(f: &mut F, node: crate::LitCStr) -> crate::LitCStr +where + F: Fold + ?Sized, +{ + let span = f.fold_span(node.span()); + let mut node = node; + node.set_span(span); + node +} pub fn fold_lit_char(f: &mut F, node: crate::LitChar) -> crate::LitChar where F: Fold + ?Sized, diff --git a/src/gen/hash.rs b/src/gen/hash.rs index 7ead139c3e..54d8fe25d4 100644 --- a/src/gen/hash.rs +++ b/src/gen/hash.rs @@ -1682,28 +1682,32 @@ impl Hash for crate::Lit { state.write_u8(1u8); v0.hash(state); } - crate::Lit::Byte(v0) => { + crate::Lit::CStr(v0) => { state.write_u8(2u8); v0.hash(state); } - crate::Lit::Char(v0) => { + crate::Lit::Byte(v0) => { state.write_u8(3u8); v0.hash(state); } - crate::Lit::Int(v0) => { + crate::Lit::Char(v0) => { state.write_u8(4u8); v0.hash(state); } - crate::Lit::Float(v0) => { + crate::Lit::Int(v0) => { state.write_u8(5u8); v0.hash(state); } - crate::Lit::Bool(v0) => { + crate::Lit::Float(v0) => { state.write_u8(6u8); v0.hash(state); } - crate::Lit::Verbatim(v0) => { + crate::Lit::Bool(v0) => { state.write_u8(7u8); + v0.hash(state); + } + crate::Lit::Verbatim(v0) => { + state.write_u8(8u8); v0.to_string().hash(state); } } diff --git a/src/gen/visit.rs b/src/gen/visit.rs index 5d87e63f74..8da25c87c1 100644 --- a/src/gen/visit.rs +++ b/src/gen/visit.rs @@ -547,6 +547,9 @@ pub trait Visit<'ast> { fn visit_lit_byte_str(&mut self, i: &'ast crate::LitByteStr) { visit_lit_byte_str(self, i); } + fn visit_lit_cstr(&mut self, i: &'ast crate::LitCStr) { + visit_lit_cstr(self, i); + } fn visit_lit_char(&mut self, i: &'ast crate::LitChar) { visit_lit_char(self, i); } @@ -2694,6 +2697,9 @@ where crate::Lit::ByteStr(_binding_0) => { v.visit_lit_byte_str(_binding_0); } + crate::Lit::CStr(_binding_0) => { + v.visit_lit_cstr(_binding_0); + } crate::Lit::Byte(_binding_0) => { v.visit_lit_byte(_binding_0); } @@ -2729,6 +2735,10 @@ pub fn visit_lit_byte_str<'ast, V>(v: &mut V, node: &'ast crate::LitByteStr) where V: Visit<'ast> + ?Sized, {} +pub fn visit_lit_cstr<'ast, V>(v: &mut V, node: &'ast crate::LitCStr) +where + V: Visit<'ast> + ?Sized, +{} pub fn visit_lit_char<'ast, V>(v: &mut V, node: &'ast crate::LitChar) where V: Visit<'ast> + ?Sized, diff --git a/src/gen/visit_mut.rs b/src/gen/visit_mut.rs index f35fc099af..06345a53c1 100644 --- a/src/gen/visit_mut.rs +++ b/src/gen/visit_mut.rs @@ -548,6 +548,9 @@ pub trait VisitMut { fn visit_lit_byte_str_mut(&mut self, i: &mut crate::LitByteStr) { visit_lit_byte_str_mut(self, i); } + fn visit_lit_cstr_mut(&mut self, i: &mut crate::LitCStr) { + visit_lit_cstr_mut(self, i); + } fn visit_lit_char_mut(&mut self, i: &mut crate::LitChar) { visit_lit_char_mut(self, i); } @@ -2694,6 +2697,9 @@ where crate::Lit::ByteStr(_binding_0) => { v.visit_lit_byte_str_mut(_binding_0); } + crate::Lit::CStr(_binding_0) => { + v.visit_lit_cstr_mut(_binding_0); + } crate::Lit::Byte(_binding_0) => { v.visit_lit_byte_mut(_binding_0); } @@ -2729,6 +2735,10 @@ pub fn visit_lit_byte_str_mut(v: &mut V, node: &mut crate::LitByteStr) where V: VisitMut + ?Sized, {} +pub fn visit_lit_cstr_mut(v: &mut V, node: &mut crate::LitCStr) +where + V: VisitMut + ?Sized, +{} pub fn visit_lit_char_mut(v: &mut V, node: &mut crate::LitChar) where V: VisitMut + ?Sized, diff --git a/src/lib.rs b/src/lib.rs index af04ffc1c4..5be5b0a9cd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -418,7 +418,9 @@ mod lit; #[doc(hidden)] // https://github.com/dtolnay/syn/issues/1566 pub use crate::lit::StrStyle; #[doc(inline)] -pub use crate::lit::{Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; +pub use crate::lit::{ + Lit, LitBool, LitByte, LitByteStr, LitCStr, LitChar, LitFloat, LitInt, LitStr, +}; #[cfg(feature = "parsing")] mod lookahead; diff --git a/src/lit.rs b/src/lit.rs index bb88016acc..aa95020dce 100644 --- a/src/lit.rs +++ b/src/lit.rs @@ -6,6 +6,7 @@ use crate::{Error, Result}; use proc_macro2::{Ident, Literal, Span}; #[cfg(feature = "parsing")] use proc_macro2::{TokenStream, TokenTree}; +use std::ffi::{CStr, CString}; use std::fmt::{self, Display}; #[cfg(feature = "extra-traits")] use std::hash::{Hash, Hasher}; @@ -27,6 +28,9 @@ ast_enum_of_structs! { /// A byte string literal: `b"foo"`. ByteStr(LitByteStr), + /// A nul-terminated C-string literal: `c"foo"`. + CStr(LitCStr), + /// A byte literal: `b'f'`. Byte(LitByte), @@ -63,6 +67,13 @@ ast_struct! { } } +ast_struct! { + /// A nul-terminated C-string literal: `c"foo"`. + pub struct LitCStr { + repr: Box, + } +} + ast_struct! { /// A byte literal: `b'f'`. pub struct LitByte { @@ -294,6 +305,41 @@ impl LitByteStr { } } +impl LitCStr { + pub fn new(value: &CStr, span: Span) -> Self { + let mut token = Literal::c_string(value); + token.set_span(span); + LitCStr { + repr: Box::new(LitRepr { + token, + suffix: Box::::default(), + }), + } + } + + pub fn value(&self) -> CString { + let repr = self.repr.token.to_string(); + let (value, _suffix) = value::parse_lit_c_str(&repr); + value + } + + pub fn span(&self) -> Span { + self.repr.token.span() + } + + pub fn set_span(&mut self, span: Span) { + self.repr.token.set_span(span); + } + + pub fn suffix(&self) -> &str { + &self.repr.suffix + } + + pub fn token(&self) -> Literal { + self.repr.token.clone() + } +} + impl LitByte { pub fn new(value: u8, span: Span) -> Self { let mut token = Literal::u8_suffixed(value); @@ -555,7 +601,7 @@ impl LitBool { #[cfg(feature = "extra-traits")] mod debug_impls { - use crate::lit::{LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; + use crate::lit::{LitBool, LitByte, LitByteStr, LitCStr, LitChar, LitFloat, LitInt, LitStr}; use std::fmt::{self, Debug}; #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] @@ -590,6 +636,22 @@ mod debug_impls { } } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] + impl Debug for LitCStr { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + self.debug(formatter, "LitCStr") + } + } + + impl LitCStr { + pub(crate) fn debug(&self, formatter: &mut fmt::Formatter, name: &str) -> fmt::Result { + formatter + .debug_struct(name) + .field("token", &format_args!("{}", self.repr.token)) + .finish() + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "extra-traits")))] impl Debug for LitByte { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { @@ -750,6 +812,7 @@ macro_rules! lit_extra_traits { lit_extra_traits!(LitStr); lit_extra_traits!(LitByteStr); +lit_extra_traits!(LitCStr); lit_extra_traits!(LitByte); lit_extra_traits!(LitChar); lit_extra_traits!(LitInt); @@ -790,7 +853,7 @@ pub(crate) mod parsing { use crate::buffer::Cursor; use crate::error::Result; use crate::lit::{ - value, Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitFloatRepr, LitInt, + value, Lit, LitBool, LitByte, LitByteStr, LitCStr, LitChar, LitFloat, LitFloatRepr, LitInt, LitIntRepr, LitStr, }; use crate::parse::{Parse, ParseStream}; @@ -889,6 +952,17 @@ pub(crate) mod parsing { } } + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] + impl Parse for LitCStr { + fn parse(input: ParseStream) -> Result { + let head = input.fork(); + match input.parse() { + Ok(Lit::CStr(lit)) => Ok(lit), + _ => Err(head.error("expected C string literal")), + } + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "parsing")))] impl Parse for LitByte { fn parse(input: ParseStream) -> Result { @@ -947,7 +1021,7 @@ pub(crate) mod parsing { #[cfg(feature = "printing")] mod printing { - use crate::lit::{LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitInt, LitStr}; + use crate::lit::{LitBool, LitByte, LitByteStr, LitCStr, LitChar, LitFloat, LitInt, LitStr}; use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; @@ -965,6 +1039,13 @@ mod printing { } } + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] + impl ToTokens for LitCStr { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.repr.token.to_tokens(tokens); + } + } + #[cfg_attr(doc_cfg, doc(cfg(feature = "printing")))] impl ToTokens for LitByte { fn to_tokens(&self, tokens: &mut TokenStream) { @@ -1004,12 +1085,13 @@ mod printing { mod value { use crate::bigint::BigInt; use crate::lit::{ - Lit, LitBool, LitByte, LitByteStr, LitChar, LitFloat, LitFloatRepr, LitInt, LitIntRepr, - LitRepr, LitStr, + Lit, LitBool, LitByte, LitByteStr, LitCStr, LitChar, LitFloat, LitFloatRepr, LitInt, + LitIntRepr, LitRepr, LitStr, }; use proc_macro2::{Literal, Span}; use std::ascii; use std::char; + use std::ffi::CString; use std::ops::{Index, RangeFrom}; impl Lit { @@ -1042,6 +1124,13 @@ mod value { } _ => {} }, + // c"...", cr"...", cr#"..."# + b'c' => { + let (_, suffix) = parse_lit_c_str(&repr); + return Lit::CStr(LitCStr { + repr: Box::new(LitRepr { token, suffix }), + }); + } // '...' b'\'' => { let (_, suffix) = parse_lit_char(&repr); @@ -1080,9 +1169,6 @@ mod value { }); } } - // c"...", cr"...", cr#"..."# - // TODO: add a Lit::CStr variant? - b'c' => return Lit::Verbatim(token), b'(' if repr == "(/*ERROR*/)" => return Lit::Verbatim(token), _ => {} } @@ -1094,6 +1180,7 @@ mod value { match self { Lit::Str(lit) => lit.suffix(), Lit::ByteStr(lit) => lit.suffix(), + Lit::CStr(lit) => lit.suffix(), Lit::Byte(lit) => lit.suffix(), Lit::Char(lit) => lit.suffix(), Lit::Int(lit) => lit.suffix(), @@ -1106,6 +1193,7 @@ mod value { match self { Lit::Str(lit) => lit.span(), Lit::ByteStr(lit) => lit.span(), + Lit::CStr(lit) => lit.span(), Lit::Byte(lit) => lit.span(), Lit::Char(lit) => lit.span(), Lit::Int(lit) => lit.span(), @@ -1119,6 +1207,7 @@ mod value { match self { Lit::Str(lit) => lit.set_span(span), Lit::ByteStr(lit) => lit.set_span(span), + Lit::CStr(lit) => lit.set_span(span), Lit::Byte(lit) => lit.set_span(span), Lit::Char(lit) => lit.set_span(span), Lit::Int(lit) => lit.set_span(span), @@ -1317,6 +1406,92 @@ mod value { (String::from(value).into_bytes(), suffix) } + // Returns (content, suffix). + pub(crate) fn parse_lit_c_str(s: &str) -> (CString, Box) { + assert_eq!(byte(s, 0), b'c'); + match byte(s, 1) { + b'"' => parse_lit_c_str_cooked(s), + b'r' => parse_lit_c_str_raw(s), + _ => unreachable!(), + } + } + + // Clippy false positive + // https://github.com/rust-lang-nursery/rust-clippy/issues/2329 + #[allow(clippy::needless_continue)] + fn parse_lit_c_str_cooked(mut s: &str) -> (CString, Box) { + assert_eq!(byte(s, 0), b'c'); + assert_eq!(byte(s, 1), b'"'); + s = &s[2..]; + + // We're going to want to have slices which don't respect codepoint boundaries. + let mut v = s.as_bytes(); + + let mut out = Vec::new(); + 'outer: loop { + let byte = match byte(v, 0) { + b'"' => break, + b'\\' => { + let b = byte(v, 1); + v = &v[2..]; + match b { + b'x' => { + let (b, rest) = backslash_x(v); + assert!(b != 0, "\\x00 is not allowed in C-string literal"); + v = rest; + b + } + b'u' => { + let (ch, rest) = backslash_u(v); + assert!(ch != '\0', "\\u{{0}} is not allowed in C-string literal"); + v = rest; + out.extend_from_slice(ch.encode_utf8(&mut [0u8; 4]).as_bytes()); + continue 'outer; + } + b'n' => b'\n', + b'r' => b'\r', + b't' => b'\t', + b'\\' => b'\\', + b'\'' => b'\'', + b'"' => b'"', + b'\r' | b'\n' => loop { + let byte = byte(v, 0); + if matches!(byte, b' ' | b'\t' | b'\n' | b'\r') { + v = &v[1..]; + } else { + continue 'outer; + } + }, + b => panic!( + "unexpected byte '{}' after \\ character in byte literal", + ascii::escape_default(b), + ), + } + } + b'\r' => { + assert_eq!(byte(v, 1), b'\n', "bare CR not allowed in string"); + v = &v[2..]; + b'\n' + } + b => { + v = &v[1..]; + b + } + }; + out.push(byte); + } + + assert_eq!(byte(v, 0), b'"'); + let suffix = s[s.len() - v.len() + 1..].to_owned().into_boxed_str(); + (CString::new(out).unwrap(), suffix) + } + + fn parse_lit_c_str_raw(s: &str) -> (CString, Box) { + assert_eq!(byte(s, 0), b'c'); + let (value, suffix) = parse_lit_str_raw(&s[1..]); + (CString::new(String::from(value)).unwrap(), suffix) + } + // Returns (value, suffix). pub(crate) fn parse_lit_byte(s: &str) -> (u8, Box) { assert_eq!(byte(s, 0), b'b'); @@ -1427,7 +1602,10 @@ mod value { (ch, &s[2..]) } - fn backslash_u(mut s: &str) -> (char, &str) { + fn backslash_u(mut s: &S) -> (char, &S) + where + S: Index, Output = S> + AsRef<[u8]> + ?Sized, + { if byte(s, 0) != b'{' { panic!("{}", "expected { after \\u"); } diff --git a/syn.json b/syn.json index c8918179c3..db31b02920 100644 --- a/syn.json +++ b/syn.json @@ -3366,6 +3366,11 @@ "syn": "LitByteStr" } ], + "CStr": [ + { + "syn": "LitCStr" + } + ], "Byte": [ { "syn": "LitByte" @@ -3425,6 +3430,12 @@ "any": [] } }, + { + "ident": "LitCStr", + "features": { + "any": [] + } + }, { "ident": "LitChar", "features": { diff --git a/tests/debug/gen.rs b/tests/debug/gen.rs index 1548d8b6ba..9f726683a2 100644 --- a/tests/debug/gen.rs +++ b/tests/debug/gen.rs @@ -2864,6 +2864,7 @@ impl Debug for Lite { match &self.value { syn::Lit::Str(_val) => write!(formatter, "{:?}", _val.value()), syn::Lit::ByteStr(_val) => write!(formatter, "{:?}", _val.value()), + syn::Lit::CStr(_val) => write!(formatter, "{:?}", _val.value()), syn::Lit::Byte(_val) => write!(formatter, "{:?}", _val.value()), syn::Lit::Char(_val) => write!(formatter, "{:?}", _val.value()), syn::Lit::Int(_val) => write!(formatter, "{}", _val), @@ -2901,6 +2902,11 @@ impl Debug for Lite { write!(formatter, "{:?}", self.value.value()) } } +impl Debug for Lite { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + write!(formatter, "{:?}", self.value.value()) + } +} impl Debug for Lite { fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { write!(formatter, "{:?}", self.value.value())