diff --git a/compiler/rustc_macros/src/lib.rs b/compiler/rustc_macros/src/lib.rs index 3589860eb0eac..1bb77b18afc95 100644 --- a/compiler/rustc_macros/src/lib.rs +++ b/compiler/rustc_macros/src/lib.rs @@ -13,7 +13,6 @@ mod newtype; mod query; mod serialize; mod session_diagnostic; -mod symbols; mod type_foldable; #[proc_macro] @@ -21,11 +20,6 @@ pub fn rustc_queries(input: TokenStream) -> TokenStream { query::rustc_queries(input) } -#[proc_macro] -pub fn symbols(input: TokenStream) -> TokenStream { - symbols::symbols(input.into()).into() -} - /// Creates a struct type `S` that can be used as an index with /// `IndexVec` and so on. /// diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs deleted file mode 100644 index 1b245f2a75060..0000000000000 --- a/compiler/rustc_macros/src/symbols.rs +++ /dev/null @@ -1,236 +0,0 @@ -//! Proc macro which builds the Symbol table -//! -//! # Debugging -//! -//! Since this proc-macro does some non-trivial work, debugging it is important. -//! This proc-macro can be invoked as an ordinary unit test, like so: -//! -//! ```bash -//! cd compiler/rustc_macros -//! cargo test symbols::test_symbols -- --nocapture -//! ``` -//! -//! This unit test finds the `symbols!` invocation in `compiler/rustc_span/src/symbol.rs` -//! and runs it. It verifies that the output token stream can be parsed as valid module -//! items and that no errors were produced. -//! -//! You can also view the generated code by using `cargo expand`: -//! -//! ```bash -//! cargo install cargo-expand # this is necessary only once -//! cd compiler/rustc_span -//! cargo expand > /tmp/rustc_span.rs # it's a big file -//! ``` - -use proc_macro2::{Span, TokenStream}; -use quote::quote; -use std::collections::HashMap; -use syn::parse::{Parse, ParseStream, Result}; -use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token}; - -#[cfg(test)] -mod tests; - -mod kw { - syn::custom_keyword!(Keywords); - syn::custom_keyword!(Symbols); -} - -struct Keyword { - name: Ident, - value: LitStr, -} - -impl Parse for Keyword { - fn parse(input: ParseStream<'_>) -> Result { - let name = input.parse()?; - input.parse::()?; - let value = input.parse()?; - - Ok(Keyword { name, value }) - } -} - -struct Symbol { - name: Ident, - value: Option, -} - -impl Parse for Symbol { - fn parse(input: ParseStream<'_>) -> Result { - let name = input.parse()?; - let value = match input.parse::() { - Ok(_) => Some(input.parse()?), - Err(_) => None, - }; - - Ok(Symbol { name, value }) - } -} - -struct Input { - keywords: Punctuated, - symbols: Punctuated, -} - -impl Parse for Input { - fn parse(input: ParseStream<'_>) -> Result { - input.parse::()?; - let content; - braced!(content in input); - let keywords = Punctuated::parse_terminated(&content)?; - - input.parse::()?; - let content; - braced!(content in input); - let symbols = Punctuated::parse_terminated(&content)?; - - Ok(Input { keywords, symbols }) - } -} - -#[derive(Default)] -struct Errors { - list: Vec, -} - -impl Errors { - fn error(&mut self, span: Span, message: String) { - self.list.push(syn::Error::new(span, message)); - } -} - -pub fn symbols(input: TokenStream) -> TokenStream { - let (mut output, errors) = symbols_with_errors(input); - - // If we generated any errors, then report them as compiler_error!() macro calls. - // This lets the errors point back to the most relevant span. It also allows us - // to report as many errors as we can during a single run. - output.extend(errors.into_iter().map(|e| e.to_compile_error())); - - output -} - -fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { - let mut errors = Errors::default(); - - let input: Input = match syn::parse2(input) { - Ok(input) => input, - Err(e) => { - // This allows us to display errors at the proper span, while minimizing - // unrelated errors caused by bailing out (and not generating code). - errors.list.push(e); - Input { keywords: Default::default(), symbols: Default::default() } - } - }; - - let mut keyword_stream = quote! {}; - let mut symbols_stream = quote! {}; - let mut prefill_stream = quote! {}; - let mut counter = 0u32; - let mut keys = - HashMap::::with_capacity(input.keywords.len() + input.symbols.len() + 10); - let mut prev_key: Option<(Span, String)> = None; - - let mut check_dup = |span: Span, str: &str, errors: &mut Errors| { - if let Some(prev_span) = keys.get(str) { - errors.error(span, format!("Symbol `{}` is duplicated", str)); - errors.error(*prev_span, "location of previous definition".to_string()); - } else { - keys.insert(str.to_string(), span); - } - }; - - let mut check_order = |span: Span, str: &str, errors: &mut Errors| { - if let Some((prev_span, ref prev_str)) = prev_key { - if str < prev_str { - errors.error(span, format!("Symbol `{}` must precede `{}`", str, prev_str)); - errors.error(prev_span, format!("location of previous symbol `{}`", prev_str)); - } - } - prev_key = Some((span, str.to_string())); - }; - - // Generate the listed keywords. - for keyword in input.keywords.iter() { - let name = &keyword.name; - let value = &keyword.value; - let value_string = value.value(); - check_dup(keyword.name.span(), &value_string, &mut errors); - prefill_stream.extend(quote! { - #value, - }); - keyword_stream.extend(quote! { - pub const #name: Symbol = Symbol::new(#counter); - }); - counter += 1; - } - - // Generate the listed symbols. - for symbol in input.symbols.iter() { - let name = &symbol.name; - let value = match &symbol.value { - Some(value) => value.value(), - None => name.to_string(), - }; - check_dup(symbol.name.span(), &value, &mut errors); - check_order(symbol.name.span(), &name.to_string(), &mut errors); - - prefill_stream.extend(quote! { - #value, - }); - symbols_stream.extend(quote! { - pub const #name: Symbol = Symbol::new(#counter); - }); - counter += 1; - } - - // Generate symbols for the strings "0", "1", ..., "9". - let digits_base = counter; - counter += 10; - for n in 0..10 { - let n = n.to_string(); - check_dup(Span::call_site(), &n, &mut errors); - prefill_stream.extend(quote! { - #n, - }); - } - let _ = counter; // for future use - - let output = quote! { - const SYMBOL_DIGITS_BASE: u32 = #digits_base; - - #[doc(hidden)] - #[allow(non_upper_case_globals)] - mod kw_generated { - use super::Symbol; - #keyword_stream - } - - #[allow(non_upper_case_globals)] - #[doc(hidden)] - pub mod sym_generated { - use super::Symbol; - #symbols_stream - } - - impl Interner { - pub(crate) fn fresh() -> Self { - Interner::prefill(&[ - #prefill_stream - ]) - } - } - }; - - (output, errors.list) - - // To see the generated code, use the "cargo expand" command. - // Do this once to install: - // cargo install cargo-expand - // - // Then, cd to rustc_span and run: - // cargo expand > /tmp/rustc_span_expanded.rs - // - // and read that file. -} diff --git a/compiler/rustc_macros/src/symbols/tests.rs b/compiler/rustc_macros/src/symbols/tests.rs deleted file mode 100644 index 842d2a977189d..0000000000000 --- a/compiler/rustc_macros/src/symbols/tests.rs +++ /dev/null @@ -1,102 +0,0 @@ -use super::*; - -// This test is mainly here for interactive development. Use this test while -// you're working on the proc-macro defined in this file. -#[test] -fn test_symbols() { - // We textually include the symbol.rs file, which contains the list of all - // symbols, keywords, and common words. Then we search for the - // `symbols! { ... }` call. - - static SYMBOL_RS_FILE: &str = include_str!("../../../rustc_span/src/symbol.rs"); - - let file = syn::parse_file(SYMBOL_RS_FILE).unwrap(); - let symbols_path: syn::Path = syn::parse_quote!(symbols); - - let m: &syn::ItemMacro = file - .items - .iter() - .filter_map(|i| { - if let syn::Item::Macro(m) = i { - if m.mac.path == symbols_path { Some(m) } else { None } - } else { - None - } - }) - .next() - .expect("did not find `symbols!` macro invocation."); - - let body_tokens = m.mac.tokens.clone(); - - test_symbols_macro(body_tokens, &[]); -} - -fn test_symbols_macro(input: TokenStream, expected_errors: &[&str]) { - let (output, found_errors) = symbols_with_errors(input); - - // It should always parse. - let _parsed_file = syn::parse2::(output).unwrap(); - - assert_eq!( - found_errors.len(), - expected_errors.len(), - "Macro generated a different number of errors than expected" - ); - - for (found_error, &expected_error) in found_errors.iter().zip(expected_errors) { - let found_error_str = format!("{}", found_error); - assert_eq!(found_error_str, expected_error); - } -} - -#[test] -fn check_dup_keywords() { - let input = quote! { - Keywords { - Crate: "crate", - Crate: "crate", - } - Symbols {} - }; - test_symbols_macro(input, &["Symbol `crate` is duplicated", "location of previous definition"]); -} - -#[test] -fn check_dup_symbol() { - let input = quote! { - Keywords {} - Symbols { - splat, - splat, - } - }; - test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); -} - -#[test] -fn check_dup_symbol_and_keyword() { - let input = quote! { - Keywords { - Splat: "splat", - } - Symbols { - splat, - } - }; - test_symbols_macro(input, &["Symbol `splat` is duplicated", "location of previous definition"]); -} - -#[test] -fn check_symbol_order() { - let input = quote! { - Keywords {} - Symbols { - zebra, - aardvark, - } - }; - test_symbols_macro( - input, - &["Symbol `aardvark` must precede `zebra`", "location of previous symbol `zebra`"], - ); -} diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index f5803aaa0786e..b4a9fe0e11425 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1,6 +1,8 @@ //! An "interner" is a data structure that associates values with usize tags and //! allows bidirectional lookup; i.e., given a value, one can easily find the //! type, and vice versa. +// NOTE: If the symbols list is moved to another file, +// please also update src/tools/tidy/src/symbols.rs use rustc_arena::DroplessArena; use rustc_data_structures::fx::FxHashMap; @@ -19,11 +21,73 @@ use crate::{with_session_globals, Edition, Span, DUMMY_SP}; #[cfg(test)] mod tests; -// The proc macro code for this is in `compiler/rustc_macros/src/symbols.rs`. +macro_rules! symbols { + (@$_:ident: $lit:literal) => ($lit); + (@$name:ident) => (stringify!($name)); + ( + Keywords { + $($KName:ident: $KDisplay:literal),*$(,)? + } + + Symbols { + $($SName:ident$(: $SDisplay:literal)?),*$(,)? + } + ) => { + mod all { + #[allow(non_camel_case_types)] + #[repr(u32)] + pub enum KSyms { + $( + $KName, + )* + ____anchor, + } + + #[allow(non_camel_case_types)] + #[repr(u32)] + pub enum SSyms { + $( + $SName, + )* + ____anchor + } + } + + const SYMBOL_DIGITS_BASE: u32 = all::KSyms::____anchor as u32 + all::SSyms::____anchor as u32; + + #[doc(hidden)] + #[allow(non_upper_case_globals)] + mod kw_generated { + use super::{Symbol, all}; + + $(pub const $KName: Symbol = Symbol::new(all::KSyms::$KName as u32);)* + } + + #[doc(hidden)] + #[allow(non_upper_case_globals)] + mod sym_generated { + use super::{Symbol, all}; + + $(pub const $SName: Symbol = Symbol::new(all::KSyms::____anchor as u32 + all::SSyms::$SName as u32);)* + } + + impl Interner { + pub(crate) fn fresh() -> Self { + Interner::prefill(&[ + $($KDisplay,)* + $(symbols!(@$SName$(: $SDisplay)?),)* + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" + ]) + } + } + }; +} + symbols! { // After modifying this list adjust `is_special`, `is_used_keyword`/`is_unused_keyword`, // this should be rarely necessary though if the keywords are kept in alphabetic order. Keywords { + // keywords-start // Special reserved identifiers used internally for elided lifetimes, // unnamed method parameters, crate root module, error recovery etc. Empty: "", @@ -101,6 +165,7 @@ symbols! { MacroRules: "macro_rules", Raw: "raw", Union: "union", + // keywords-end } // Pre-interned symbols that can be referred to with `rustc_span::sym::*`. @@ -114,14 +179,14 @@ symbols! { // As well as the symbols listed, there are symbols for the strings // "0", "1", ..., "9", which are accessible via `sym::integer`. // - // The proc macro will abort if symbols are not in alphabetical order (as - // defined by `impl Ord for str`) or if any symbols are duplicated. Vim - // users can sort the list by selecting it and executing the command - // `:'<,'>!LC_ALL=C sort`. + // Tidy will error if symbols are not in alphabetical order (as defined by + // `impl Ord for str`) or if any symbols are duplicated. Vim users can sort + // the list by selecting it and executing the command `:'<,'>!LC_ALL=C sort`. // // There is currently no checking that all symbols are used; that would be // nice to have. Symbols { + // symbols-start AcqRel, Acquire, Alignment, @@ -1531,6 +1596,7 @@ symbols! { xmm_reg, ymm_reg, zmm_reg, + // symbols-end } } diff --git a/src/tools/tidy/src/lib.rs b/src/tools/tidy/src/lib.rs index 09848462ae207..2d7cab41009f5 100644 --- a/src/tools/tidy/src/lib.rs +++ b/src/tools/tidy/src/lib.rs @@ -48,6 +48,7 @@ pub mod features; pub mod pal; pub mod primitive_docs; pub mod style; +pub mod symbols; pub mod target_specific_tests; pub mod ui_tests; pub mod unit_tests; diff --git a/src/tools/tidy/src/main.rs b/src/tools/tidy/src/main.rs index d555f7c8e34ff..652c6c25ff73d 100644 --- a/src/tools/tidy/src/main.rs +++ b/src/tools/tidy/src/main.rs @@ -68,6 +68,7 @@ fn main() { // Checks that only make sense for the compiler. check!(errors, &compiler_path); check!(error_codes_check, &[&src_path, &compiler_path]); + check!(symbols, &compiler_path); // Checks that only make sense for the std libs. check!(pal, &library_path); diff --git a/src/tools/tidy/src/symbols.rs b/src/tools/tidy/src/symbols.rs new file mode 100644 index 0000000000000..faa103d9c1a50 --- /dev/null +++ b/src/tools/tidy/src/symbols.rs @@ -0,0 +1,91 @@ +//! Ensure that the symbols are sorted and not duplicated within rustc_span/src/symbol.rs + +use std::collections::HashSet; +use std::fs::read_to_string; +use std::path::Path; + +#[derive(Debug)] +enum Sym<'a> { + Ident(&'a str), + IdentLit { ident: &'a str, lit: &'a str }, +} + +impl Sym<'_> { + fn symbol(&self, errors: &mut Vec) -> Option<&'_ str> { + Some(match self { + Sym::Ident(name) => name, + Sym::IdentLit { lit, .. } => { + if lit.contains(|c| matches!(c, '\n' | '\r')) { + // FIXME perhaps we could allow escapes by copying logic from `unescape_literal` + errors.push(format!("literal contains escapes: {}", lit)); + return None; + } + lit + } + }) + } + fn name(&self) -> &str { + match self { + Sym::Ident(name) => name, + Sym::IdentLit { ident, .. } => ident, + } + } +} + +pub fn check(compiler_path: &Path, bad: &mut bool) { + let file = t!(read_to_string(compiler_path.join("rustc_span/src/symbol.rs"))); + let kws = syms(&file, "// keywords-start", "// keywords-end"); + let syms = syms(&file, "// symbols-start", "// symbols-end"); + let mut names: HashSet<_> = "0123456789".chars().map(|c| c.to_string()).collect(); + let mut errors = Vec::new(); + let mut prev: Option = None; + let mut report = |sym: &Sym<'_>, sorted: bool| { + let s = if let Some(sym) = sym.symbol(&mut errors) { sym } else { return }; + if !names.insert(s.to_owned()) { + errors.push(format!("duplicate symbol: `{}`", s)); + } + if sorted { + if let Some(prevsym) = prev.take() { + if &*prevsym > sym.name() { + errors.push(format!( + "symbol list not sorted: `{}` should come before `{}`", + s, prevsym + )); + } + } + prev = Some(sym.name().to_string()); + } + }; + for kw in kws { + report(&kw, false); + } + for sym in syms { + report(&sym, true); + } + if !errors.is_empty() { + *bad = true; + } + for error in errors { + eprintln!("{error}"); + } +} + +fn syms<'a>( + file: &'a str, + start_anchor: &'static str, + end_anchor: &'static str, +) -> impl Iterator> + 'a { + let start = file.find(start_anchor).expect("start anchor") + start_anchor.len(); + let end = file.find(end_anchor).expect("end anchor"); + let symbols = &file[start..end]; + + symbols + .split(",") + .flat_map(|s| s.lines().map(str::trim).filter(|s| !s.starts_with("//") && !s.is_empty())) + .map(|s| { + s.split_once(":").map(|(a, b)| (a.trim(), b.trim())).map_or( + Sym::Ident(s), + |(ident, lit)| Sym::IdentLit { ident, lit: &lit[1..lit.len() - 1] }, + ) + }) +}