From d33b3562e5e888eaffd2f8f1af08ca2afdbe542c Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 16 Feb 2020 16:47:24 +0300 Subject: [PATCH 1/4] parser: Do not call `bump` recursively Token normalization is merged directly into `bump`. Special "unknown macro variable" diagnostic for unexpected `$`s is removed as preventing legal code from compiling. --- src/librustc_expand/mbe/macro_parser.rs | 2 - src/librustc_expand/mbe/macro_rules.rs | 1 - src/librustc_parse/parser/mod.rs | 75 +++++++++++-------------- src/test/ui/issues/issue-6596-1.rs | 2 +- src/test/ui/issues/issue-6596-1.stderr | 4 +- src/test/ui/issues/issue-6596-2.rs | 2 +- src/test/ui/issues/issue-6596-2.stderr | 4 +- 7 files changed, 38 insertions(+), 52 deletions(-) diff --git a/src/librustc_expand/mbe/macro_parser.rs b/src/librustc_expand/mbe/macro_parser.rs index 5bf7602ea6e8f..6599e92222c75 100644 --- a/src/librustc_expand/mbe/macro_parser.rs +++ b/src/librustc_expand/mbe/macro_parser.rs @@ -856,8 +856,6 @@ fn parse_nt(p: &mut Parser<'_>, sp: Span, name: Symbol) -> Nonterminal { if name == sym::tt { return token::NtTT(p.parse_token_tree()); } - // check at the beginning and the parser checks after each bump - p.process_potential_macro_variable(); match parse_nt_inner(p, sp, name) { Ok(nt) => nt, Err(mut err) => { diff --git a/src/librustc_expand/mbe/macro_rules.rs b/src/librustc_expand/mbe/macro_rules.rs index f3c827b1816a8..52e581e30f537 100644 --- a/src/librustc_expand/mbe/macro_rules.rs +++ b/src/librustc_expand/mbe/macro_rules.rs @@ -267,7 +267,6 @@ fn generic_extension<'cx>( cx.current_expansion.module.mod_path.last().map(|id| id.to_string()); p.last_type_ascription = cx.current_expansion.prior_type_ascription; - p.process_potential_macro_variable(); // Let the context choose how to interpret the result. // Weird, but useful for X-macros. return Box::new(ParserAnyMacro { diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 79944dc35e523..4f96d33b83f2f 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -404,7 +404,8 @@ impl<'a> Parser<'a> { subparser_name, }; - parser.token = parser.next_tok(); + // Make parser point to the first token. + parser.bump(); if let Some(directory) = directory { parser.directory = directory; @@ -418,7 +419,6 @@ impl<'a> Parser<'a> { } } - parser.process_potential_macro_variable(); parser } @@ -430,7 +430,7 @@ impl<'a> Parser<'a> { self.unnormalized_prev_token.as_ref().unwrap_or(&self.prev_token) } - fn next_tok(&mut self) -> Token { + fn next_tok(&mut self, fallback_span: Span) -> Token { let mut next = if self.desugar_doc_comments { self.token_cursor.next_desugared() } else { @@ -438,7 +438,7 @@ impl<'a> Parser<'a> { }; if next.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. - next.span = self.unnormalized_token().span.with_ctxt(next.span.ctxt()); + next.span = fallback_span.with_ctxt(next.span.ctxt()); } next } @@ -896,6 +896,24 @@ impl<'a> Parser<'a> { self.parse_delim_comma_seq(token::Paren, f) } + // Interpolated identifier (`$i: ident`) and lifetime (`$l: lifetime`) + // tokens are replaced with usual identifier and lifetime tokens, + // so the former are never encountered during normal parsing. + fn normalize_token(token: &Token) -> Option { + match &token.kind { + token::Interpolated(nt) => match **nt { + token::NtIdent(ident, is_raw) => { + Some(Token::new(token::Ident(ident.name, is_raw), ident.span)) + } + token::NtLifetime(ident) => { + Some(Token::new(token::Lifetime(ident.name), ident.span)) + } + _ => None, + }, + _ => None, + } + } + /// Advance the parser by one token. pub fn bump(&mut self) { if self.prev_token.kind == TokenKind::Eof { @@ -905,16 +923,17 @@ impl<'a> Parser<'a> { } // Update the current and previous tokens. - let next_token = self.next_tok(); - self.prev_token = mem::replace(&mut self.token, next_token); + self.prev_token = self.token.take(); self.unnormalized_prev_token = self.unnormalized_token.take(); + self.token = self.next_tok(self.unnormalized_prev_token().span); + if let Some(normalized_token) = Self::normalize_token(&self.token) { + self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token)); + } // Update fields derived from the previous token. self.prev_span = self.unnormalized_prev_token().span; self.expected_tokens.clear(); - // Check after each token. - self.process_potential_macro_variable(); } /// Advances the parser using provided token as a next one. Use this when @@ -924,9 +943,12 @@ impl<'a> Parser<'a> { /// Correct token kinds and spans need to be calculated instead. fn bump_with(&mut self, next: TokenKind, span: Span) { // Update the current and previous tokens. - let next_token = Token::new(next, span); - self.prev_token = mem::replace(&mut self.token, next_token); + self.prev_token = self.token.take(); self.unnormalized_prev_token = self.unnormalized_token.take(); + self.token = Token::new(next, span); + if let Some(normalized_token) = Self::normalize_token(&self.token) { + self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token)); + } // Update fields derived from the previous token. self.prev_span = self.unnormalized_prev_token().span.with_hi(span.lo()); @@ -1066,39 +1088,6 @@ impl<'a> Parser<'a> { } } - pub fn process_potential_macro_variable(&mut self) { - let normalized_token = match self.token.kind { - token::Dollar - if self.token.span.from_expansion() && self.look_ahead(1, |t| t.is_ident()) => - { - self.bump(); - let name = match self.token.kind { - token::Ident(name, _) => name, - _ => unreachable!(), - }; - let span = self.prev_span.to(self.token.span); - self.struct_span_err(span, &format!("unknown macro variable `{}`", name)) - .span_label(span, "unknown macro variable") - .emit(); - self.bump(); - return; - } - token::Interpolated(ref nt) => { - // Interpolated identifier and lifetime tokens are replaced with usual identifier - // and lifetime tokens, so the former are never encountered during normal parsing. - match **nt { - token::NtIdent(ident, is_raw) => { - Token::new(token::Ident(ident.name, is_raw), ident.span) - } - token::NtLifetime(ident) => Token::new(token::Lifetime(ident.name), ident.span), - _ => return, - } - } - _ => return, - }; - self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token)); - } - /// Parses a single token tree from the input. pub fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { diff --git a/src/test/ui/issues/issue-6596-1.rs b/src/test/ui/issues/issue-6596-1.rs index 5da54451346a5..25f1d6500726e 100644 --- a/src/test/ui/issues/issue-6596-1.rs +++ b/src/test/ui/issues/issue-6596-1.rs @@ -1,7 +1,7 @@ macro_rules! e { ($inp:ident) => ( $nonexistent - //~^ ERROR unknown macro variable `nonexistent` + //~^ ERROR expected expression, found `$` ); } diff --git a/src/test/ui/issues/issue-6596-1.stderr b/src/test/ui/issues/issue-6596-1.stderr index 4f29d8a927456..216fe6472a503 100644 --- a/src/test/ui/issues/issue-6596-1.stderr +++ b/src/test/ui/issues/issue-6596-1.stderr @@ -1,8 +1,8 @@ -error: unknown macro variable `nonexistent` +error: expected expression, found `$` --> $DIR/issue-6596-1.rs:3:9 | LL | $nonexistent - | ^^^^^^^^^^^^ unknown macro variable + | ^^^^^^^^^^^^ expected expression ... LL | e!(foo); | -------- in this macro invocation diff --git a/src/test/ui/issues/issue-6596-2.rs b/src/test/ui/issues/issue-6596-2.rs index b19700efe5ad3..8f7c98d9a67a7 100644 --- a/src/test/ui/issues/issue-6596-2.rs +++ b/src/test/ui/issues/issue-6596-2.rs @@ -3,7 +3,7 @@ macro_rules! g { ($inp:ident) => ( { $inp $nonexistent } - //~^ ERROR unknown macro variable `nonexistent` + //~^ ERROR expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `$` ); } diff --git a/src/test/ui/issues/issue-6596-2.stderr b/src/test/ui/issues/issue-6596-2.stderr index 4fcb0176faafd..3d13c64f762ea 100644 --- a/src/test/ui/issues/issue-6596-2.stderr +++ b/src/test/ui/issues/issue-6596-2.stderr @@ -1,8 +1,8 @@ -error: unknown macro variable `nonexistent` +error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `$` --> $DIR/issue-6596-2.rs:5:16 | LL | { $inp $nonexistent } - | ^^^^^^^^^^^^ unknown macro variable + | ^^^^^^^^^^^^ expected one of 8 possible tokens ... LL | g!(foo); | -------- in this macro invocation From ed2fd28d385c1cc9b2ab3e91513b4d2ffc612671 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 16 Feb 2020 21:36:50 +0300 Subject: [PATCH 2/4] parser: Set previous and unnormalized tokens in couple more places --- src/librustc_parse/lib.rs | 1 + src/librustc_parse/parser/item.rs | 7 ++++--- src/librustc_parse/parser/mod.rs | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index 4aad2c0f68a29..40d7a34a8b0d2 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -172,6 +172,7 @@ fn maybe_source_file_to_parser( parser.unclosed_delims = unclosed_delims; if parser.token == token::Eof && parser.token.span.is_dummy() { parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); + assert!(parser.unnormalized_token.is_none()); } Ok(parser) diff --git a/src/librustc_parse/parser/item.rs b/src/librustc_parse/parser/item.rs index d7b8d9778f0d2..5dc50a0cf2fde 100644 --- a/src/librustc_parse/parser/item.rs +++ b/src/librustc_parse/parser/item.rs @@ -1400,8 +1400,9 @@ impl<'a> Parser<'a> { } fn report_invalid_macro_expansion_item(&self, args: &MacArgs) { + let span = args.span().expect("undelimited macro call"); let mut err = self.struct_span_err( - self.prev_span, + span, "macros that expand to items must be delimited with braces or followed by a semicolon", ); if self.unclosed_delims.is_empty() { @@ -1416,14 +1417,14 @@ impl<'a> Parser<'a> { ); } else { err.span_suggestion( - self.prev_span, + span, "change the delimiters to curly braces", " { /* items */ }".to_string(), Applicability::HasPlaceholders, ); } err.span_suggestion( - self.prev_span.shrink_to_hi(), + span.shrink_to_hi(), "add a semicolon", ';'.to_string(), Applicability::MaybeIncorrect, diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index 4f96d33b83f2f..e04cfa374682b 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -95,7 +95,7 @@ pub struct Parser<'a> { /// The current non-normalized token if it's different from `token`. /// Preferable use is through the `unnormalized_token()` getter. /// Use span from this token if you need to concatenate it with some neighbouring spans. - unnormalized_token: Option, + pub unnormalized_token: Option, /// The previous normalized token. /// Use span from this token if you need an isolated span. prev_token: Token, @@ -1096,15 +1096,15 @@ impl<'a> Parser<'a> { &mut self.token_cursor.frame, self.token_cursor.stack.pop().unwrap(), ); - self.token.span = frame.span.entire(); + self.token = Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close); + self.unnormalized_token = None; self.bump(); TokenTree::Delimited(frame.span, frame.delim, frame.tree_cursor.stream.into()) } token::CloseDelim(_) | token::Eof => unreachable!(), _ => { - let token = self.token.clone(); self.bump(); - TokenTree::Token(token) + TokenTree::Token(self.prev_token.clone()) } } } From 06fbb0b4faefeaf70f4616d6af9bc0c1ebc69bc2 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 16 Feb 2020 23:19:51 +0300 Subject: [PATCH 3/4] parser: Remove `Option`s from unnormalized tokens They are always set synchronously with normalized tokens now --- src/librustc_parse/lib.rs | 8 ++--- src/librustc_parse/parser/expr.rs | 4 +-- src/librustc_parse/parser/mod.rs | 55 +++++++++++-------------------- src/librustc_parse/parser/path.rs | 2 +- 4 files changed, 26 insertions(+), 43 deletions(-) diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs index 40d7a34a8b0d2..a0b8415b3e17e 100644 --- a/src/librustc_parse/lib.rs +++ b/src/librustc_parse/lib.rs @@ -9,7 +9,7 @@ use rustc_errors::{Diagnostic, FatalError, Level, PResult}; use rustc_session::parse::ParseSess; use rustc_span::{FileName, SourceFile, Span}; use syntax::ast; -use syntax::token::{self, Nonterminal}; +use syntax::token::{self, Nonterminal, Token}; use syntax::tokenstream::{self, TokenStream, TokenTree}; use std::path::{Path, PathBuf}; @@ -170,9 +170,9 @@ fn maybe_source_file_to_parser( let (stream, unclosed_delims) = maybe_file_to_stream(sess, source_file, None)?; let mut parser = stream_to_parser(sess, stream, None); parser.unclosed_delims = unclosed_delims; - if parser.token == token::Eof && parser.token.span.is_dummy() { - parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); - assert!(parser.unnormalized_token.is_none()); + if parser.token == token::Eof { + let span = Span::new(end_pos, end_pos, parser.token.span.ctxt()); + parser.set_token(Token::new(token::Eof, span)); } Ok(parser) diff --git a/src/librustc_parse/parser/expr.rs b/src/librustc_parse/parser/expr.rs index 51822ab2ea5a1..97daa91eed196 100644 --- a/src/librustc_parse/parser/expr.rs +++ b/src/librustc_parse/parser/expr.rs @@ -166,7 +166,7 @@ impl<'a> Parser<'a> { while let Some(op) = self.check_assoc_op() { // Adjust the span for interpolated LHS to point to the `$lhs` token // and not to what it refers to. - let lhs_span = match self.unnormalized_prev_token().kind { + let lhs_span = match self.unnormalized_prev_token.kind { TokenKind::Interpolated(..) => self.prev_span, _ => lhs.span, }; @@ -527,7 +527,7 @@ impl<'a> Parser<'a> { ) -> PResult<'a, (Span, P)> { expr.map(|e| { ( - match self.unnormalized_prev_token().kind { + match self.unnormalized_prev_token.kind { TokenKind::Interpolated(..) => self.prev_span, _ => e.span, }, diff --git a/src/librustc_parse/parser/mod.rs b/src/librustc_parse/parser/mod.rs index e04cfa374682b..937e5e3cd695b 100644 --- a/src/librustc_parse/parser/mod.rs +++ b/src/librustc_parse/parser/mod.rs @@ -93,18 +93,16 @@ pub struct Parser<'a> { /// Use span from this token if you need an isolated span. pub token: Token, /// The current non-normalized token if it's different from `token`. - /// Preferable use is through the `unnormalized_token()` getter. /// Use span from this token if you need to concatenate it with some neighbouring spans. - pub unnormalized_token: Option, + unnormalized_token: Token, /// The previous normalized token. /// Use span from this token if you need an isolated span. prev_token: Token, /// The previous non-normalized token if it's different from `prev_token`. - /// Preferable use is through the `unnormalized_prev_token()` getter. /// Use span from this token if you need to concatenate it with some neighbouring spans. - unnormalized_prev_token: Option, - /// Equivalent to `unnormalized_prev_token().span`. - /// FIXME: Remove in favor of `(unnormalized_)prev_token().span`. + unnormalized_prev_token: Token, + /// Equivalent to `unnormalized_prev_token.span`. + /// FIXME: Remove in favor of `(unnormalized_)prev_token.span`. pub prev_span: Span, restrictions: Restrictions, /// Used to determine the path to externally loaded source files. @@ -378,9 +376,9 @@ impl<'a> Parser<'a> { let mut parser = Parser { sess, token: Token::dummy(), - unnormalized_token: None, + unnormalized_token: Token::dummy(), prev_token: Token::dummy(), - unnormalized_prev_token: None, + unnormalized_prev_token: Token::dummy(), prev_span: DUMMY_SP, restrictions: Restrictions::empty(), recurse_into_file_modules, @@ -422,14 +420,6 @@ impl<'a> Parser<'a> { parser } - fn unnormalized_token(&self) -> &Token { - self.unnormalized_token.as_ref().unwrap_or(&self.token) - } - - fn unnormalized_prev_token(&self) -> &Token { - self.unnormalized_prev_token.as_ref().unwrap_or(&self.prev_token) - } - fn next_tok(&mut self, fallback_span: Span) -> Token { let mut next = if self.desugar_doc_comments { self.token_cursor.next_desugared() @@ -899,18 +889,17 @@ impl<'a> Parser<'a> { // Interpolated identifier (`$i: ident`) and lifetime (`$l: lifetime`) // tokens are replaced with usual identifier and lifetime tokens, // so the former are never encountered during normal parsing. - fn normalize_token(token: &Token) -> Option { - match &token.kind { + crate fn set_token(&mut self, token: Token) { + self.unnormalized_token = token; + self.token = match &self.unnormalized_token.kind { token::Interpolated(nt) => match **nt { token::NtIdent(ident, is_raw) => { - Some(Token::new(token::Ident(ident.name, is_raw), ident.span)) + Token::new(token::Ident(ident.name, is_raw), ident.span) } - token::NtLifetime(ident) => { - Some(Token::new(token::Lifetime(ident.name), ident.span)) - } - _ => None, + token::NtLifetime(ident) => Token::new(token::Lifetime(ident.name), ident.span), + _ => self.unnormalized_token.clone(), }, - _ => None, + _ => self.unnormalized_token.clone(), } } @@ -925,13 +914,11 @@ impl<'a> Parser<'a> { // Update the current and previous tokens. self.prev_token = self.token.take(); self.unnormalized_prev_token = self.unnormalized_token.take(); - self.token = self.next_tok(self.unnormalized_prev_token().span); - if let Some(normalized_token) = Self::normalize_token(&self.token) { - self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token)); - } + let next_token = self.next_tok(self.unnormalized_prev_token.span); + self.set_token(next_token); // Update fields derived from the previous token. - self.prev_span = self.unnormalized_prev_token().span; + self.prev_span = self.unnormalized_prev_token.span; self.expected_tokens.clear(); } @@ -945,13 +932,10 @@ impl<'a> Parser<'a> { // Update the current and previous tokens. self.prev_token = self.token.take(); self.unnormalized_prev_token = self.unnormalized_token.take(); - self.token = Token::new(next, span); - if let Some(normalized_token) = Self::normalize_token(&self.token) { - self.unnormalized_token = Some(mem::replace(&mut self.token, normalized_token)); - } + self.set_token(Token::new(next, span)); // Update fields derived from the previous token. - self.prev_span = self.unnormalized_prev_token().span.with_hi(span.lo()); + self.prev_span = self.unnormalized_prev_token.span.with_hi(span.lo()); self.expected_tokens.clear(); } @@ -1096,8 +1080,7 @@ impl<'a> Parser<'a> { &mut self.token_cursor.frame, self.token_cursor.stack.pop().unwrap(), ); - self.token = Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close); - self.unnormalized_token = None; + self.set_token(Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close)); self.bump(); TokenTree::Delimited(frame.span, frame.delim, frame.tree_cursor.stream.into()) } diff --git a/src/librustc_parse/parser/path.rs b/src/librustc_parse/parser/path.rs index 761c06b70ee8b..18e57c6a5d49f 100644 --- a/src/librustc_parse/parser/path.rs +++ b/src/librustc_parse/parser/path.rs @@ -134,7 +134,7 @@ impl<'a> Parser<'a> { path }); - let lo = self.unnormalized_token().span; + let lo = self.unnormalized_token.span; let mut segments = Vec::new(); let mod_sep_ctxt = self.token.span.ctxt(); if self.eat(&token::ModSep) { From 950845c5b1079c83e56db0ca2b4bb8fe050ee2f5 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Mon, 17 Feb 2020 22:47:40 +0300 Subject: [PATCH 4/4] Add a test for proc macro generating `$ IDENT` --- .../auxiliary/generate-dollar-ident.rs | 17 +++++++++++++++++ .../ui/proc-macro/generate-dollar-ident.rs | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 src/test/ui/proc-macro/auxiliary/generate-dollar-ident.rs create mode 100644 src/test/ui/proc-macro/generate-dollar-ident.rs diff --git a/src/test/ui/proc-macro/auxiliary/generate-dollar-ident.rs b/src/test/ui/proc-macro/auxiliary/generate-dollar-ident.rs new file mode 100644 index 0000000000000..c9f0664c3a3ac --- /dev/null +++ b/src/test/ui/proc-macro/auxiliary/generate-dollar-ident.rs @@ -0,0 +1,17 @@ +// force-host +// no-prefer-dynamic + +#![feature(proc_macro_hygiene)] +#![feature(proc_macro_quote)] +#![crate_type = "proc-macro"] + +extern crate proc_macro; +use proc_macro::*; + +#[proc_macro] +pub fn dollar_ident(input: TokenStream) -> TokenStream { + let black_hole = input.into_iter().next().unwrap(); + quote! { + $black_hole!($$var); + } +} diff --git a/src/test/ui/proc-macro/generate-dollar-ident.rs b/src/test/ui/proc-macro/generate-dollar-ident.rs new file mode 100644 index 0000000000000..b838be9fb9f2c --- /dev/null +++ b/src/test/ui/proc-macro/generate-dollar-ident.rs @@ -0,0 +1,18 @@ +// Proc macros can generate token sequence `$ IDENT` +// without it being recognized as an unknown macro variable. + +// check-pass +// aux-build:generate-dollar-ident.rs + +extern crate generate_dollar_ident; +use generate_dollar_ident::*; + +macro_rules! black_hole { + ($($tt:tt)*) => {}; +} + +black_hole!($var); + +dollar_ident!(black_hole); + +fn main() {}