From d21b4f30c1d96cdb9f46fb8435ee292b274a07c1 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 07:47:22 +1100 Subject: [PATCH 1/7] Introduce `TtParser`. It currently has no state, just the three methods `parse_tt`, `parse_tt_inner`, and `bb_items_ambiguity_error`. This commit is large but trivial, and mostly consists of changes to the indentation of those methods. Subsequent commits will do more. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 573 +++++++++--------- compiler/rustc_expand/src/mbe/macro_rules.rs | 54 +- 2 files changed, 323 insertions(+), 304 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 8cc81f1eca890..7b5835fce5401 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -492,319 +492,334 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { } } -/// Process the matcher positions of `cur_items` until it is empty. In the process, this will -/// produce more items in `next_items` and `bb_items`. -/// -/// For more info about the how this happens, see the module-level doc comments and the inline -/// comments of this function. -/// -/// # Parameters -/// -/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a -/// successful execution of this function. -/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in -/// the function `parse`. -/// - `bb_items`: the set of items that are waiting for the black-box parser. -/// - `token`: the current token of the parser. -/// -/// # Returns -/// -/// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept track of -/// through the items generated. -fn parse_tt_inner<'root, 'tt>( - sess: &ParseSess, - ms: &[TokenTree], - cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - next_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - token: &Token, -) -> Option { - // Matcher positions that would be valid if the macro invocation was over now. Only modified if - // `token == Eof`. - let mut eof_items = EofItems::None; - - while let Some(mut item) = cur_items.pop() { - // When unzipped trees end, remove them. This corresponds to backtracking out of a - // delimited submatcher into which we already descended. When backtracking out again, we - // need to advance the "dot" past the delimiters in the outer matcher. - while item.idx >= item.top_elts.len() { - match item.stack.pop() { - Some(MatcherTtFrame { elts, idx }) => { - item.top_elts = elts; - item.idx = idx + 1; +pub struct TtParser; + +impl TtParser { + /// Process the matcher positions of `cur_items` until it is empty. In the process, this will + /// produce more items in `next_items` and `bb_items`. + /// + /// For more info about the how this happens, see the module-level doc comments and the inline + /// comments of this function. + /// + /// # Parameters + /// + /// - `cur_items`: the set of current items to be processed. This should be empty by the end of + /// a successful execution of this function. + /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in + /// the function `parse`. + /// - `bb_items`: the set of items that are waiting for the black-box parser. + /// - `token`: the current token of the parser. + /// + /// # Returns + /// + /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept + /// track of through the items generated. + fn parse_tt_inner<'root, 'tt>( + &self, + sess: &ParseSess, + ms: &[TokenTree], + cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + next_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + token: &Token, + ) -> Option { + // Matcher positions that would be valid if the macro invocation was over now. Only + // modified if `token == Eof`. + let mut eof_items = EofItems::None; + + while let Some(mut item) = cur_items.pop() { + // When unzipped trees end, remove them. This corresponds to backtracking out of a + // delimited submatcher into which we already descended. When backtracking out again, we + // need to advance the "dot" past the delimiters in the outer matcher. + while item.idx >= item.top_elts.len() { + match item.stack.pop() { + Some(MatcherTtFrame { elts, idx }) => { + item.top_elts = elts; + item.idx = idx + 1; + } + None => break, } - None => break, } - } - // Get the current position of the "dot" (`idx`) in `item` and the number of token trees in - // the matcher (`len`). - let idx = item.idx; - let len = item.top_elts.len(); - - if idx < len { - // We are in the middle of a matcher. Compare the matcher's current tt against `token`. - match item.top_elts.get_tt(idx) { - TokenTree::Sequence(sp, seq) => { - let op = seq.kleene.op; - if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne { - // Allow for the possibility of zero matches of this sequence. - let mut new_item = item.clone(); - new_item.match_cur += seq.num_captures; - new_item.idx += 1; - for idx in item.match_cur..item.match_cur + seq.num_captures { - new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![]))); + // Get the current position of the "dot" (`idx`) in `item` and the number of token + // trees in the matcher (`len`). + let idx = item.idx; + let len = item.top_elts.len(); + + if idx < len { + // We are in the middle of a matcher. Compare the matcher's current tt against + // `token`. + match item.top_elts.get_tt(idx) { + TokenTree::Sequence(sp, seq) => { + let op = seq.kleene.op; + if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne { + // Allow for the possibility of zero matches of this sequence. + let mut new_item = item.clone(); + new_item.match_cur += seq.num_captures; + new_item.idx += 1; + for idx in item.match_cur..item.match_cur + seq.num_captures { + new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![]))); + } + cur_items.push(new_item); } - cur_items.push(new_item); + + // Allow for the possibility of one or more matches of this sequence. + cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos::repetition( + item, sp, seq, + )))); } - // Allow for the possibility of one or more matches of this sequence. - cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos::repetition( - item, sp, seq, - )))); - } + TokenTree::MetaVarDecl(span, _, None) => { + // E.g. `$e` instead of `$e:expr`. + if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { + return Some(Error(span, "missing fragment specifier".to_string())); + } + } - TokenTree::MetaVarDecl(span, _, None) => { - // E.g. `$e` instead of `$e:expr`. - if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { - return Some(Error(span, "missing fragment specifier".to_string())); + TokenTree::MetaVarDecl(_, _, Some(kind)) => { + // Built-in nonterminals never start with these tokens, so we can eliminate + // them from consideration. + // + // We use the span of the metavariable declaration to determine any + // edition-specific matching behavior for non-terminals. + if Parser::nonterminal_may_begin_with(kind, token) { + bb_items.push(item); + } } - } - TokenTree::MetaVarDecl(_, _, Some(kind)) => { - // Built-in nonterminals never start with these tokens, so we can eliminate - // them from consideration. - // - // We use the span of the metavariable declaration to determine any - // edition-specific matching behavior for non-terminals. - if Parser::nonterminal_may_begin_with(kind, token) { - bb_items.push(item); + seq @ (TokenTree::Delimited(..) + | TokenTree::Token(Token { kind: DocComment(..), .. })) => { + // To descend into a delimited submatcher or a doc comment, we push the + // current matcher onto a stack and push a new item containing the + // submatcher onto `cur_items`. + // + // At the beginning of the loop, if we reach the end of the delimited + // submatcher, we pop the stack to backtrack out of the descent. + let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); + let idx = item.idx; + item.stack.push(MatcherTtFrame { elts: lower_elts, idx }); + item.idx = 0; + cur_items.push(item); } - } - seq @ (TokenTree::Delimited(..) - | TokenTree::Token(Token { kind: DocComment(..), .. })) => { - // To descend into a delimited submatcher or a doc comment, we push the current - // matcher onto a stack and push a new item containing the submatcher onto - // `cur_items`. - // - // At the beginning of the loop, if we reach the end of the delimited - // submatcher, we pop the stack to backtrack out of the descent. - let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); - let idx = item.idx; - item.stack.push(MatcherTtFrame { elts: lower_elts, idx }); - item.idx = 0; - cur_items.push(item); + TokenTree::Token(t) => { + // If the token matches, we can just advance the parser. Otherwise, this + // match hash failed, there is nothing to do, and hopefully another item in + // `cur_items` will match. + if token_name_eq(&t, token) { + item.idx += 1; + next_items.push(item); + } + } + + // These cannot appear in a matcher. + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), + } + } else if let Some(repetition) = &item.repetition { + // We are past the end of a repetition. + debug_assert!(idx <= len + 1); + debug_assert!(matches!(item.top_elts, Tt(TokenTree::Sequence(..)))); + + if idx == len { + // Add all matches from the sequence to `up`, and move the "dot" past the + // repetition in `up`. This allows for the case where the sequence matching is + // finished. + let mut new_pos = repetition.up.clone(); + for idx in item.match_lo..item.match_hi { + let sub = item.matches[idx].clone(); + new_pos.push_match(idx, MatchedSeq(sub)); + } + new_pos.match_cur = item.match_hi; + new_pos.idx += 1; + cur_items.push(new_pos); } - TokenTree::Token(t) => { - // If the token matches, we can just advance the parser. Otherwise, this match - // hash failed, there is nothing to do, and hopefully another item in - // `cur_items` will match. - if token_name_eq(&t, token) { + if idx == len && repetition.sep.is_some() { + if repetition.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) { + // The matcher has a separator, and it matches the current token. We can + // advance past the separator token. item.idx += 1; next_items.push(item); } + } else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne { + // We don't need a separator. Move the "dot" back to the beginning of the + // matcher and try to match again UNLESS we are only allowed to have _one_ + // repetition. + item.match_cur = item.match_lo; + item.idx = 0; + cur_items.push(item); } - - // These cannot appear in a matcher. - TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), - } - } else if let Some(repetition) = &item.repetition { - // We are past the end of a repetition. - debug_assert!(idx <= len + 1); - debug_assert!(matches!(item.top_elts, Tt(TokenTree::Sequence(..)))); - - if idx == len { - // Add all matches from the sequence to `up`, and move the "dot" past the - // repetition in `up`. This allows for the case where the sequence matching is - // finished. - let mut new_pos = repetition.up.clone(); - for idx in item.match_lo..item.match_hi { - let sub = item.matches[idx].clone(); - new_pos.push_match(idx, MatchedSeq(sub)); + } else { + // We are past the end of the matcher, and not in a repetition. Look for end of + // input. + debug_assert_eq!(idx, len); + if *token == token::Eof { + eof_items = match eof_items { + EofItems::None => EofItems::One(item), + EofItems::One(_) | EofItems::Multiple => EofItems::Multiple, + } } - new_pos.match_cur = item.match_hi; - new_pos.idx += 1; - cur_items.push(new_pos); } + } - if idx == len && repetition.sep.is_some() { - if repetition.sep.as_ref().map_or(false, |sep| token_name_eq(token, sep)) { - // The matcher has a separator, and it matches the current token. We can - // advance past the separator token. - item.idx += 1; - next_items.push(item); + // If we reached the end of input, check that there is EXACTLY ONE possible matcher. + // Otherwise, either the parse is ambiguous (which is an error) or there is a syntax error. + if *token == token::Eof { + Some(match eof_items { + EofItems::One(mut eof_item) => { + let matches = + eof_item.matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap()); + nameize(sess, ms, matches) } - } else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne { - // We don't need a separator. Move the "dot" back to the beginning of the - // matcher and try to match again UNLESS we are only allowed to have _one_ - // repetition. - item.match_cur = item.match_lo; - item.idx = 0; - cur_items.push(item); - } - } else { - // We are past the end of the matcher, and not in a repetition. Look for end of input. - debug_assert_eq!(idx, len); - if *token == token::Eof { - eof_items = match eof_items { - EofItems::None => EofItems::One(item), - EofItems::One(_) | EofItems::Multiple => EofItems::Multiple, + EofItems::Multiple => { + Error(token.span, "ambiguity: multiple successful parses".to_string()) } - } + EofItems::None => Failure( + Token::new( + token::Eof, + if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, + ), + "missing tokens in macro arguments", + ), + }) + } else { + None } } - // If we reached the end of input, check that there is EXACTLY ONE possible matcher. Otherwise, - // either the parse is ambiguous (which is an error) or there is a syntax error. - if *token == token::Eof { - Some(match eof_items { - EofItems::One(mut eof_item) => { - let matches = - eof_item.matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap()); - nameize(sess, ms, matches) + /// Use the given slice of token trees (`ms`) as a matcher. Match the token stream from the + /// given `parser` against it and return the match. + pub(super) fn parse_tt( + &self, + parser: &mut Cow<'_, Parser<'_>>, + ms: &[TokenTree], + macro_name: Ident, + ) -> NamedParseResult { + // A queue of possible matcher positions. We initialize it with the matcher position in + // which the "dot" is before the first token of the first token tree in `ms`. + // `parse_tt_inner` then processes all of these possible matcher positions and produces + // possible next positions into `next_items`. After some post-processing, the contents of + // `next_items` replenish `cur_items` and we start over again. + // + // This MatcherPos instance is allocated on the stack. All others -- and there are + // frequently *no* others! -- are allocated on the heap. + let mut initial = MatcherPos::new(ms); + let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; + + loop { + let mut next_items = SmallVec::new(); + + // Matcher positions black-box parsed by `Parser`. + let mut bb_items = SmallVec::new(); + + // Process `cur_items` until either we have finished the input or we need to get some + // parsing from the black-box parser done. + if let Some(result) = self.parse_tt_inner( + parser.sess, + ms, + &mut cur_items, + &mut next_items, + &mut bb_items, + &parser.token, + ) { + return result; } - EofItems::Multiple => { - Error(token.span, "ambiguity: multiple successful parses".to_string()) - } - EofItems::None => Failure( - Token::new( - token::Eof, - if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, - ), - "missing tokens in macro arguments", - ), - }) - } else { - None - } -} -/// Use the given slice of token trees (`ms`) as a matcher. Match the token stream from the given -/// `parser` against it and return the match. -pub(super) fn parse_tt( - parser: &mut Cow<'_, Parser<'_>>, - ms: &[TokenTree], - macro_name: Ident, -) -> NamedParseResult { - // A queue of possible matcher positions. We initialize it with the matcher position in which - // the "dot" is before the first token of the first token tree in `ms`. `parse_tt_inner` then - // processes all of these possible matcher positions and produces possible next positions into - // `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items` - // and we start over again. - // - // This MatcherPos instance is allocated on the stack. All others -- and there are frequently - // *no* others! -- are allocated on the heap. - let mut initial = MatcherPos::new(ms); - let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; - - loop { - let mut next_items = SmallVec::new(); - - // Matcher positions black-box parsed by `Parser`. - let mut bb_items = SmallVec::new(); - - // Process `cur_items` until either we have finished the input or we need to get some - // parsing from the black-box parser done. - if let Some(result) = parse_tt_inner( - parser.sess, - ms, - &mut cur_items, - &mut next_items, - &mut bb_items, - &parser.token, - ) { - return result; - } - - // `parse_tt_inner` handled all cur_items, so it's empty. - assert!(cur_items.is_empty()); + // `parse_tt_inner` handled all cur_items, so it's empty. + assert!(cur_items.is_empty()); + + // Error messages here could be improved with links to original rules. + match (next_items.len(), bb_items.len()) { + (0, 0) => { + // There are no possible next positions AND we aren't waiting for the black-box + // parser: syntax error. + return Failure( + parser.token.clone(), + "no rules expected this token in macro call", + ); + } - // Error messages here could be improved with links to original rules. - match (next_items.len(), bb_items.len()) { - (0, 0) => { - // There are no possible next positions AND we aren't waiting for the black-box - // parser: syntax error. - return Failure(parser.token.clone(), "no rules expected this token in macro call"); - } + (_, 0) => { + // Dump all possible `next_items` into `cur_items` for the next iteration. Then + // process the next token. + cur_items.extend(next_items.drain(..)); + parser.to_mut().bump(); + } - (_, 0) => { - // Dump all possible `next_items` into `cur_items` for the next iteration. Then - // process the next token. - cur_items.extend(next_items.drain(..)); - parser.to_mut().bump(); - } + (0, 1) => { + // We need to call the black-box parser to get some nonterminal. + let mut item = bb_items.pop().unwrap(); + if let TokenTree::MetaVarDecl(span, _, Some(kind)) = + item.top_elts.get_tt(item.idx) + { + let match_cur = item.match_cur; + // We use the span of the metavariable declaration to determine any + // edition-specific matching behavior for non-terminals. + let nt = match parser.to_mut().parse_nonterminal(kind) { + Err(mut err) => { + err.span_label( + span, + format!( + "while parsing argument for this `{kind}` macro fragment" + ), + ) + .emit(); + return ErrorReported; + } + Ok(nt) => nt, + }; + item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt))); + item.idx += 1; + item.match_cur += 1; + } else { + unreachable!() + } + cur_items.push(item); + } - (0, 1) => { - // We need to call the black-box parser to get some nonterminal. - let mut item = bb_items.pop().unwrap(); - if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx) - { - let match_cur = item.match_cur; - // We use the span of the metavariable declaration to determine any - // edition-specific matching behavior for non-terminals. - let nt = match parser.to_mut().parse_nonterminal(kind) { - Err(mut err) => { - err.span_label( - span, - format!("while parsing argument for this `{kind}` macro fragment"), - ) - .emit(); - return ErrorReported; - } - Ok(nt) => nt, - }; - item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt))); - item.idx += 1; - item.match_cur += 1; - } else { - unreachable!() + (_, _) => { + // Too many possibilities! + return self.bb_items_ambiguity_error( + macro_name, + next_items, + bb_items, + parser.token.span, + ); } - cur_items.push(item); } - (_, _) => { - // Too many possibilities! - return bb_items_ambiguity_error( - macro_name, - next_items, - bb_items, - parser.token.span, - ); - } + assert!(!cur_items.is_empty()); } - - assert!(!cur_items.is_empty()); } -} -fn bb_items_ambiguity_error<'root, 'tt>( - macro_name: Ident, - next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - bb_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - token_span: rustc_span::Span, -) -> NamedParseResult { - let nts = bb_items - .iter() - .map(|item| match item.top_elts.get_tt(item.idx) { - TokenTree::MetaVarDecl(_, bind, Some(kind)) => { - format!("{} ('{}')", kind, bind) - } - _ => panic!(), - }) - .collect::>() - .join(" or "); - - Error( - token_span, - format!( - "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", - match next_items.len() { - 0 => format!("built-in NTs {}.", nts), - 1 => format!("built-in NTs {} or 1 other option.", nts), - n => format!("built-in NTs {} or {} other options.", nts, n), - } - ), - ) + fn bb_items_ambiguity_error<'root, 'tt>( + &self, + macro_name: Ident, + next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + bb_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + token_span: rustc_span::Span, + ) -> NamedParseResult { + let nts = bb_items + .iter() + .map(|item| match item.top_elts.get_tt(item.idx) { + TokenTree::MetaVarDecl(_, bind, Some(kind)) => { + format!("{} ('{}')", kind, bind) + } + _ => panic!(), + }) + .collect::>() + .join(" or "); + + Error( + token_span, + format!( + "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", + match next_items.len() { + 0 => format!("built-in NTs {}.", nts), + 1 => format!("built-in NTs {} or 1 other option.", nts), + n => format!("built-in NTs {} or {} other options.", nts, n), + } + ), + ) + } } diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index b93edf8da7a64..eaf02607e701c 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -3,8 +3,7 @@ use crate::base::{SyntaxExtension, SyntaxExtensionKind}; use crate::expand::{ensure_complete_parse, parse_ast_fragment, AstFragment, AstFragmentKind}; use crate::mbe; use crate::mbe::macro_check; -use crate::mbe::macro_parser::parse_tt; -use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success}; +use crate::mbe::macro_parser::{Error, ErrorReported, Failure, Success, TtParser}; use crate::mbe::macro_parser::{MatchedNonterminal, MatchedSeq}; use crate::mbe::transcribe::transcribe; @@ -246,6 +245,7 @@ fn generic_extension<'cx>( // this situation.) let parser = parser_from_cx(sess, arg.clone()); + let tt_parser = TtParser; for (i, lhs) in lhses.iter().enumerate() { // try each arm's matchers let lhs_tt = match *lhs { @@ -259,7 +259,7 @@ fn generic_extension<'cx>( // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut()); - match parse_tt(&mut Cow::Borrowed(&parser), lhs_tt, name) { + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs_tt, name) { Success(named_matches) => { // The matcher was `Success(..)`ful. // Merge the gated spans from parsing the matcher with the pre-existing ones. @@ -352,9 +352,11 @@ fn generic_extension<'cx>( mbe::TokenTree::Delimited(_, ref delim) => &delim.tts, _ => continue, }; - if let Success(_) = - parse_tt(&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), lhs_tt, name) - { + if let Success(_) = tt_parser.parse_tt( + &mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), + lhs_tt, + name, + ) { if comma_span.is_dummy() { err.note("you might be missing a comma"); } else { @@ -447,25 +449,27 @@ pub fn compile_declarative_macro( ]; let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS); - let argument_map = match parse_tt(&mut Cow::Borrowed(&parser), &argument_gram, def.ident) { - Success(m) => m, - Failure(token, msg) => { - let s = parse_failure_msg(&token); - let sp = token.span.substitute_dummy(def.span); - sess.parse_sess.span_diagnostic.struct_span_err(sp, &s).span_label(sp, msg).emit(); - return mk_syn_ext(Box::new(macro_rules_dummy_expander)); - } - Error(sp, msg) => { - sess.parse_sess - .span_diagnostic - .struct_span_err(sp.substitute_dummy(def.span), &msg) - .emit(); - return mk_syn_ext(Box::new(macro_rules_dummy_expander)); - } - ErrorReported => { - return mk_syn_ext(Box::new(macro_rules_dummy_expander)); - } - }; + let tt_parser = TtParser; + let argument_map = + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram, def.ident) { + Success(m) => m, + Failure(token, msg) => { + let s = parse_failure_msg(&token); + let sp = token.span.substitute_dummy(def.span); + sess.parse_sess.span_diagnostic.struct_span_err(sp, &s).span_label(sp, msg).emit(); + return mk_syn_ext(Box::new(macro_rules_dummy_expander)); + } + Error(sp, msg) => { + sess.parse_sess + .span_diagnostic + .struct_span_err(sp.substitute_dummy(def.span), &msg) + .emit(); + return mk_syn_ext(Box::new(macro_rules_dummy_expander)); + } + ErrorReported => { + return mk_syn_ext(Box::new(macro_rules_dummy_expander)); + } + }; let mut valid = true; From 354bd1071c0d7a8a636a211b9934a188fd64dabe Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 07:49:49 +1100 Subject: [PATCH 2/7] Rename `bb_items_ambiguity_error` as `ambiguity_error`. Because it involves `next_items` as well as `bb_items`. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 7b5835fce5401..98b63e4b5258a 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -779,7 +779,7 @@ impl TtParser { (_, _) => { // Too many possibilities! - return self.bb_items_ambiguity_error( + return self.ambiguity_error( macro_name, next_items, bb_items, @@ -792,7 +792,7 @@ impl TtParser { } } - fn bb_items_ambiguity_error<'root, 'tt>( + fn ambiguity_error<'root, 'tt>( &self, macro_name: Ident, next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, From 39810a85da1754070166bd2afc4daf0901b49ded Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 08:03:48 +1100 Subject: [PATCH 3/7] Add `TtParser::macro_name`. Instead of passing it into `parse_tt`. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 20 +++---- compiler/rustc_expand/src/mbe/macro_rules.rs | 53 +++++++++---------- 2 files changed, 35 insertions(+), 38 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 98b63e4b5258a..e2c586c31be0a 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -492,9 +492,15 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { } } -pub struct TtParser; +pub struct TtParser { + macro_name: Ident, +} impl TtParser { + pub(super) fn new(macro_name: Ident) -> Self { + Self { macro_name } + } + /// Process the matcher positions of `cur_items` until it is empty. In the process, this will /// produce more items in `next_items` and `bb_items`. /// @@ -693,7 +699,6 @@ impl TtParser { &self, parser: &mut Cow<'_, Parser<'_>>, ms: &[TokenTree], - macro_name: Ident, ) -> NamedParseResult { // A queue of possible matcher positions. We initialize it with the matcher position in // which the "dot" is before the first token of the first token tree in `ms`. @@ -779,12 +784,7 @@ impl TtParser { (_, _) => { // Too many possibilities! - return self.ambiguity_error( - macro_name, - next_items, - bb_items, - parser.token.span, - ); + return self.ambiguity_error(next_items, bb_items, parser.token.span); } } @@ -794,7 +794,6 @@ impl TtParser { fn ambiguity_error<'root, 'tt>( &self, - macro_name: Ident, next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, bb_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, token_span: rustc_span::Span, @@ -813,7 +812,8 @@ impl TtParser { Error( token_span, format!( - "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", + "local ambiguity when calling macro `{}`: multiple parsing options: {}", + self.macro_name, match next_items.len() { 0 => format!("built-in NTs {}.", nts), 1 => format!("built-in NTs {} or 1 other option.", nts), diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index eaf02607e701c..e853b3cb49ae7 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -245,7 +245,7 @@ fn generic_extension<'cx>( // this situation.) let parser = parser_from_cx(sess, arg.clone()); - let tt_parser = TtParser; + let tt_parser = TtParser::new(name); for (i, lhs) in lhses.iter().enumerate() { // try each arm's matchers let lhs_tt = match *lhs { @@ -259,7 +259,7 @@ fn generic_extension<'cx>( // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut()); - match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs_tt, name) { + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) { Success(named_matches) => { // The matcher was `Success(..)`ful. // Merge the gated spans from parsing the matcher with the pre-existing ones. @@ -352,11 +352,9 @@ fn generic_extension<'cx>( mbe::TokenTree::Delimited(_, ref delim) => &delim.tts, _ => continue, }; - if let Success(_) = tt_parser.parse_tt( - &mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), - lhs_tt, - name, - ) { + if let Success(_) = + tt_parser.parse_tt(&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), lhs_tt) + { if comma_span.is_dummy() { err.note("you might be missing a comma"); } else { @@ -449,27 +447,26 @@ pub fn compile_declarative_macro( ]; let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS); - let tt_parser = TtParser; - let argument_map = - match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram, def.ident) { - Success(m) => m, - Failure(token, msg) => { - let s = parse_failure_msg(&token); - let sp = token.span.substitute_dummy(def.span); - sess.parse_sess.span_diagnostic.struct_span_err(sp, &s).span_label(sp, msg).emit(); - return mk_syn_ext(Box::new(macro_rules_dummy_expander)); - } - Error(sp, msg) => { - sess.parse_sess - .span_diagnostic - .struct_span_err(sp.substitute_dummy(def.span), &msg) - .emit(); - return mk_syn_ext(Box::new(macro_rules_dummy_expander)); - } - ErrorReported => { - return mk_syn_ext(Box::new(macro_rules_dummy_expander)); - } - }; + let tt_parser = TtParser::new(def.ident); + let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) { + Success(m) => m, + Failure(token, msg) => { + let s = parse_failure_msg(&token); + let sp = token.span.substitute_dummy(def.span); + sess.parse_sess.span_diagnostic.struct_span_err(sp, &s).span_label(sp, msg).emit(); + return mk_syn_ext(Box::new(macro_rules_dummy_expander)); + } + Error(sp, msg) => { + sess.parse_sess + .span_diagnostic + .struct_span_err(sp.substitute_dummy(def.span), &msg) + .emit(); + return mk_syn_ext(Box::new(macro_rules_dummy_expander)); + } + ErrorReported => { + return mk_syn_ext(Box::new(macro_rules_dummy_expander)); + } + }; let mut valid = true; From 10644e0789a3c722e11f74968f24c1382f9ccb11 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 08:07:04 +1100 Subject: [PATCH 4/7] Remove an impossible code path. Doc comments cannot appear in a matcher. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index e2c586c31be0a..5e5cb23acd7a8 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -593,11 +593,9 @@ impl TtParser { } } - seq @ (TokenTree::Delimited(..) - | TokenTree::Token(Token { kind: DocComment(..), .. })) => { - // To descend into a delimited submatcher or a doc comment, we push the - // current matcher onto a stack and push a new item containing the - // submatcher onto `cur_items`. + seq @ TokenTree::Delimited(..) => { + // To descend into a delimited submatcher, we push the current matcher onto + // a stack and push a new item containing the submatcher onto `cur_items`. // // At the beginning of the loop, if we reach the end of the delimited // submatcher, we pop the stack to backtrack out of the descent. @@ -609,6 +607,9 @@ impl TtParser { } TokenTree::Token(t) => { + // Doc comments cannot appear in a matcher. + debug_assert!(!matches!(t, Token { kind: DocComment(..), .. })); + // If the token matches, we can just advance the parser. Otherwise, this // match hash failed, there is nothing to do, and hopefully another item in // `cur_items` will match. From cedb787f6e92fb079be75a9f2c00a808195543a9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 08:56:24 +1100 Subject: [PATCH 5/7] Remove `MatcherPosHandle`. This type was a small performance win for `html5ever`, which uses a macro with hundreds of very simple rules that don't contain any metavariables. But this type is complicated (extra lifetimes) and perf-neutral for macros that do have metavariables. This commit removes `MatcherPosHandle`, simplifying things a lot. This increases the allocation rate for `html5ever` and similar cases a bit, but makes things easier for follow-up changes that will improve performance more than what we lost here. --- compiler/rustc_expand/src/lib.rs | 1 + compiler/rustc_expand/src/mbe/macro_parser.rs | 110 ++++-------------- 2 files changed, 23 insertions(+), 88 deletions(-) diff --git a/compiler/rustc_expand/src/lib.rs b/compiler/rustc_expand/src/lib.rs index 8a9efe01368e3..14b3f720f83a1 100644 --- a/compiler/rustc_expand/src/lib.rs +++ b/compiler/rustc_expand/src/lib.rs @@ -1,5 +1,6 @@ #![feature(associated_type_bounds)] #![feature(associated_type_defaults)] +#![feature(box_syntax)] #![feature(crate_visibility_modifier)] #![feature(decl_macro)] #![feature(if_let_guard)] diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 5e5cb23acd7a8..267b468ca9967 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -89,7 +89,6 @@ use rustc_span::symbol::Ident; use std::borrow::Cow; use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::mem; -use std::ops::{Deref, DerefMut}; // To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. @@ -136,24 +135,8 @@ type NamedMatchVec = SmallVec<[NamedMatch; 4]>; /// Represents a single "position" (aka "matcher position", aka "item"), as /// described in the module documentation. -/// -/// Here: -/// -/// - `'root` represents the lifetime of the stack slot that holds the root -/// `MatcherPos`. As described in `MatcherPosHandle`, the root `MatcherPos` -/// structure is stored on the stack, but subsequent instances are put into -/// the heap. -/// - `'tt` represents the lifetime of the token trees that this matcher -/// position refers to. -/// -/// It is important to distinguish these two lifetimes because we have a -/// `SmallVec>` below, and the destructor of -/// that is considered to possibly access the data from its elements (it lacks -/// a `#[may_dangle]` attribute). As a result, the compiler needs to know that -/// all the elements in that `SmallVec` strictly outlive the root stack slot -/// lifetime. By separating `'tt` from `'root`, we can show that. #[derive(Clone)] -struct MatcherPos<'root, 'tt> { +struct MatcherPos<'tt> { /// The token or slice of tokens that make up the matcher. `elts` is short for "elements". top_elts: TokenTreeOrTokenTreeSlice<'tt>, @@ -185,7 +168,7 @@ struct MatcherPos<'root, 'tt> { match_hi: usize, /// This field is only used if we are matching a repetition. - repetition: Option>, + repetition: Option>, /// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from /// a delimited token tree (e.g., something wrapped in `(` `)`) or to get the contents of a doc @@ -200,9 +183,9 @@ struct MatcherPos<'root, 'tt> { // This type is used a lot. Make sure it doesn't unintentionally get bigger. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(MatcherPos<'_, '_>, 240); +rustc_data_structures::static_assert_size!(MatcherPos<'_>, 232); -impl<'root, 'tt> MatcherPos<'root, 'tt> { +impl<'tt> MatcherPos<'tt> { /// `len` `Vec`s (initially shared and empty) that will store matches of metavars. fn create_matches(len: usize) -> Box<[Lrc]> { if len == 0 { @@ -241,11 +224,7 @@ impl<'root, 'tt> MatcherPos<'root, 'tt> { } } - fn repetition( - up: MatcherPosHandle<'root, 'tt>, - sp: DelimSpan, - seq: Lrc, - ) -> Self { + fn repetition(up: Box>, sp: DelimSpan, seq: Lrc) -> Self { MatcherPos { stack: smallvec![], idx: 0, @@ -270,7 +249,7 @@ impl<'root, 'tt> MatcherPos<'root, 'tt> { } #[derive(Clone)] -struct MatcherPosRepetition<'root, 'tt> { +struct MatcherPosRepetition<'tt> { /// The KleeneOp of this sequence. seq_op: mbe::KleeneOp, @@ -279,55 +258,12 @@ struct MatcherPosRepetition<'root, 'tt> { /// The "parent" matcher position. That is, the matcher position just before we enter the /// sequence. - up: MatcherPosHandle<'root, 'tt>, -} - -// Lots of MatcherPos instances are created at runtime. Allocating them on the -// heap is slow. Furthermore, using SmallVec to allocate them all -// on the stack is also slow, because MatcherPos is quite a large type and -// instances get moved around a lot between vectors, which requires lots of -// slow memcpy calls. -// -// Therefore, the initial MatcherPos is always allocated on the stack, -// subsequent ones (of which there aren't that many) are allocated on the heap, -// and this type is used to encapsulate both cases. -enum MatcherPosHandle<'root, 'tt> { - Ref(&'root mut MatcherPos<'root, 'tt>), - Box(Box>), + up: Box>, } -impl<'root, 'tt> Clone for MatcherPosHandle<'root, 'tt> { - // This always produces a new Box. - fn clone(&self) -> Self { - MatcherPosHandle::Box(match *self { - MatcherPosHandle::Ref(ref r) => Box::new((**r).clone()), - MatcherPosHandle::Box(ref b) => b.clone(), - }) - } -} - -impl<'root, 'tt> Deref for MatcherPosHandle<'root, 'tt> { - type Target = MatcherPos<'root, 'tt>; - fn deref(&self) -> &Self::Target { - match *self { - MatcherPosHandle::Ref(ref r) => r, - MatcherPosHandle::Box(ref b) => b, - } - } -} - -impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> { - fn deref_mut(&mut self) -> &mut MatcherPos<'root, 'tt> { - match *self { - MatcherPosHandle::Ref(ref mut r) => r, - MatcherPosHandle::Box(ref mut b) => b, - } - } -} - -enum EofItems<'root, 'tt> { +enum EofItems<'tt> { None, - One(MatcherPosHandle<'root, 'tt>), + One(Box>), Multiple, } @@ -494,6 +430,10 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { pub struct TtParser { macro_name: Ident, + + cur_items: Vec>>, + next_items: Vec>>, + bb_items: Vec>>, } impl TtParser { @@ -520,13 +460,13 @@ impl TtParser { /// /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept /// track of through the items generated. - fn parse_tt_inner<'root, 'tt>( + fn parse_tt_inner<'tt>( &self, sess: &ParseSess, ms: &[TokenTree], - cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - next_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + cur_items: &mut SmallVec<[Box>; 1]>, + next_items: &mut SmallVec<[Box>; 1]>, + bb_items: &mut SmallVec<[Box>; 1]>, token: &Token, ) -> Option { // Matcher positions that would be valid if the macro invocation was over now. Only @@ -570,9 +510,7 @@ impl TtParser { } // Allow for the possibility of one or more matches of this sequence. - cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos::repetition( - item, sp, seq, - )))); + cur_items.push(box MatcherPos::repetition(item, sp, seq)); } TokenTree::MetaVarDecl(span, _, None) => { @@ -706,11 +644,7 @@ impl TtParser { // `parse_tt_inner` then processes all of these possible matcher positions and produces // possible next positions into `next_items`. After some post-processing, the contents of // `next_items` replenish `cur_items` and we start over again. - // - // This MatcherPos instance is allocated on the stack. All others -- and there are - // frequently *no* others! -- are allocated on the heap. - let mut initial = MatcherPos::new(ms); - let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; + let mut cur_items = smallvec![box MatcherPos::new(ms)]; loop { let mut next_items = SmallVec::new(); @@ -793,10 +727,10 @@ impl TtParser { } } - fn ambiguity_error<'root, 'tt>( + fn ambiguity_error<'tt>( &self, - next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - bb_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + next_items: SmallVec<[Box>; 1]>, + bb_items: SmallVec<[Box>; 1]>, token_span: rustc_span::Span, ) -> NamedParseResult { let nts = bb_items From 754dc8e66f77da219a077d147009816e8275eed3 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 09:53:41 +1100 Subject: [PATCH 6/7] Move items into `TtParser` as `Vec`s. By putting them in `TtParser`, we can reuse them for every rule in a macro. With that done, they can be `SmallVec` instead of `Vec`, and this is a performance win because these vectors are hot and `SmallVec` operations are a bit slower due to always needing an "inline or heap?" check. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 97 ++++++++----------- compiler/rustc_expand/src/mbe/macro_rules.rs | 4 +- 2 files changed, 43 insertions(+), 58 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 267b468ca9967..674cf8554f20a 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -428,17 +428,26 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { } } -pub struct TtParser { +// Note: the item vectors could be created and dropped within `parse_tt`, but to avoid excess +// allocations we have a single vector fo each kind that is cleared and reused repeatedly. +pub struct TtParser<'tt> { macro_name: Ident, + /// The set of current items to be processed. This should be empty by the end of a successful + /// execution of `parse_tt_inner`. cur_items: Vec>>, + + /// The set of newly generated items. These are used to replenish `cur_items` in the function + /// `parse_tt`. next_items: Vec>>, + + /// The set of items that are waiting for the black-box parser. bb_items: Vec>>, } -impl TtParser { +impl<'tt> TtParser<'tt> { pub(super) fn new(macro_name: Ident) -> Self { - Self { macro_name } + Self { macro_name, cur_items: vec![], next_items: vec![], bb_items: vec![] } } /// Process the matcher positions of `cur_items` until it is empty. In the process, this will @@ -447,33 +456,21 @@ impl TtParser { /// For more info about the how this happens, see the module-level doc comments and the inline /// comments of this function. /// - /// # Parameters - /// - /// - `cur_items`: the set of current items to be processed. This should be empty by the end of - /// a successful execution of this function. - /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in - /// the function `parse`. - /// - `bb_items`: the set of items that are waiting for the black-box parser. - /// - `token`: the current token of the parser. - /// /// # Returns /// /// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept /// track of through the items generated. - fn parse_tt_inner<'tt>( - &self, + fn parse_tt_inner( + &mut self, sess: &ParseSess, ms: &[TokenTree], - cur_items: &mut SmallVec<[Box>; 1]>, - next_items: &mut SmallVec<[Box>; 1]>, - bb_items: &mut SmallVec<[Box>; 1]>, token: &Token, ) -> Option { // Matcher positions that would be valid if the macro invocation was over now. Only // modified if `token == Eof`. let mut eof_items = EofItems::None; - while let Some(mut item) = cur_items.pop() { + while let Some(mut item) = self.cur_items.pop() { // When unzipped trees end, remove them. This corresponds to backtracking out of a // delimited submatcher into which we already descended. When backtracking out again, we // need to advance the "dot" past the delimiters in the outer matcher. @@ -506,11 +503,11 @@ impl TtParser { for idx in item.match_cur..item.match_cur + seq.num_captures { new_item.push_match(idx, MatchedSeq(Lrc::new(smallvec![]))); } - cur_items.push(new_item); + self.cur_items.push(new_item); } // Allow for the possibility of one or more matches of this sequence. - cur_items.push(box MatcherPos::repetition(item, sp, seq)); + self.cur_items.push(box MatcherPos::repetition(item, sp, seq)); } TokenTree::MetaVarDecl(span, _, None) => { @@ -527,7 +524,7 @@ impl TtParser { // We use the span of the metavariable declaration to determine any // edition-specific matching behavior for non-terminals. if Parser::nonterminal_may_begin_with(kind, token) { - bb_items.push(item); + self.bb_items.push(item); } } @@ -541,7 +538,7 @@ impl TtParser { let idx = item.idx; item.stack.push(MatcherTtFrame { elts: lower_elts, idx }); item.idx = 0; - cur_items.push(item); + self.cur_items.push(item); } TokenTree::Token(t) => { @@ -553,7 +550,7 @@ impl TtParser { // `cur_items` will match. if token_name_eq(&t, token) { item.idx += 1; - next_items.push(item); + self.next_items.push(item); } } @@ -576,7 +573,7 @@ impl TtParser { } new_pos.match_cur = item.match_hi; new_pos.idx += 1; - cur_items.push(new_pos); + self.cur_items.push(new_pos); } if idx == len && repetition.sep.is_some() { @@ -584,7 +581,7 @@ impl TtParser { // The matcher has a separator, and it matches the current token. We can // advance past the separator token. item.idx += 1; - next_items.push(item); + self.next_items.push(item); } } else if repetition.seq_op != mbe::KleeneOp::ZeroOrOne { // We don't need a separator. Move the "dot" back to the beginning of the @@ -592,7 +589,7 @@ impl TtParser { // repetition. item.match_cur = item.match_lo; item.idx = 0; - cur_items.push(item); + self.cur_items.push(item); } } else { // We are past the end of the matcher, and not in a repetition. Look for end of @@ -635,41 +632,33 @@ impl TtParser { /// Use the given slice of token trees (`ms`) as a matcher. Match the token stream from the /// given `parser` against it and return the match. pub(super) fn parse_tt( - &self, + &mut self, parser: &mut Cow<'_, Parser<'_>>, - ms: &[TokenTree], + ms: &'tt [TokenTree], ) -> NamedParseResult { // A queue of possible matcher positions. We initialize it with the matcher position in // which the "dot" is before the first token of the first token tree in `ms`. // `parse_tt_inner` then processes all of these possible matcher positions and produces // possible next positions into `next_items`. After some post-processing, the contents of // `next_items` replenish `cur_items` and we start over again. - let mut cur_items = smallvec![box MatcherPos::new(ms)]; + self.cur_items.clear(); + self.cur_items.push(box MatcherPos::new(ms)); loop { - let mut next_items = SmallVec::new(); - - // Matcher positions black-box parsed by `Parser`. - let mut bb_items = SmallVec::new(); + self.next_items.clear(); + self.bb_items.clear(); // Process `cur_items` until either we have finished the input or we need to get some // parsing from the black-box parser done. - if let Some(result) = self.parse_tt_inner( - parser.sess, - ms, - &mut cur_items, - &mut next_items, - &mut bb_items, - &parser.token, - ) { + if let Some(result) = self.parse_tt_inner(parser.sess, ms, &parser.token) { return result; } // `parse_tt_inner` handled all cur_items, so it's empty. - assert!(cur_items.is_empty()); + assert!(self.cur_items.is_empty()); // Error messages here could be improved with links to original rules. - match (next_items.len(), bb_items.len()) { + match (self.next_items.len(), self.bb_items.len()) { (0, 0) => { // There are no possible next positions AND we aren't waiting for the black-box // parser: syntax error. @@ -682,13 +671,13 @@ impl TtParser { (_, 0) => { // Dump all possible `next_items` into `cur_items` for the next iteration. Then // process the next token. - cur_items.extend(next_items.drain(..)); + self.cur_items.extend(self.next_items.drain(..)); parser.to_mut().bump(); } (0, 1) => { // We need to call the black-box parser to get some nonterminal. - let mut item = bb_items.pop().unwrap(); + let mut item = self.bb_items.pop().unwrap(); if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx) { @@ -714,26 +703,22 @@ impl TtParser { } else { unreachable!() } - cur_items.push(item); + self.cur_items.push(item); } (_, _) => { // Too many possibilities! - return self.ambiguity_error(next_items, bb_items, parser.token.span); + return self.ambiguity_error(parser.token.span); } } - assert!(!cur_items.is_empty()); + assert!(!self.cur_items.is_empty()); } } - fn ambiguity_error<'tt>( - &self, - next_items: SmallVec<[Box>; 1]>, - bb_items: SmallVec<[Box>; 1]>, - token_span: rustc_span::Span, - ) -> NamedParseResult { - let nts = bb_items + fn ambiguity_error(&self, token_span: rustc_span::Span) -> NamedParseResult { + let nts = self + .bb_items .iter() .map(|item| match item.top_elts.get_tt(item.idx) { TokenTree::MetaVarDecl(_, bind, Some(kind)) => { @@ -749,7 +734,7 @@ impl TtParser { format!( "local ambiguity when calling macro `{}`: multiple parsing options: {}", self.macro_name, - match next_items.len() { + match self.next_items.len() { 0 => format!("built-in NTs {}.", nts), 1 => format!("built-in NTs {} or 1 other option.", nts), n => format!("built-in NTs {} or {} other options.", nts, n), diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index e853b3cb49ae7..db4d55256b6c0 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -245,7 +245,7 @@ fn generic_extension<'cx>( // this situation.) let parser = parser_from_cx(sess, arg.clone()); - let tt_parser = TtParser::new(name); + let mut tt_parser = TtParser::new(name); for (i, lhs) in lhses.iter().enumerate() { // try each arm's matchers let lhs_tt = match *lhs { @@ -447,7 +447,7 @@ pub fn compile_declarative_macro( ]; let parser = Parser::new(&sess.parse_sess, body, true, rustc_parse::MACRO_ARGUMENTS); - let tt_parser = TtParser::new(def.ident); + let mut tt_parser = TtParser::new(def.ident); let argument_map = match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), &argument_gram) { Success(m) => m, Failure(token, msg) => { From 31df6807893bd9c66cba57ff0f4de89ab9d8460e Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Sat, 19 Mar 2022 16:20:07 +1100 Subject: [PATCH 7/7] Eliminate `TokenTreeOrTokenTreeSlice`. As its name suggests, `TokenTreeOrTokenTreeSlice` is either a single `TokenTree` or a slice of them. It has methods `len` and `get_tt` that let it be treated much like an ordinary slice. The reason it isn't an ordinary slice is that for `TokenTree::Delimited` the open and close delimiters are represented implicitly, and when they are needed they are constructed on the fly with `Delimited::{open,close}_tt`, rather than being present in memory. This commit changes `Delimited` so the open and close delimiters are represented explicitly. As a result, `TokenTreeOrTokenTreeSlice` is no longer needed and `MatcherPos` and `MatcherTtFrame` can just use an ordinary slice. `TokenTree::{len,get_tt}` are also removed, because they were only needed to support `TokenTreeOrTokenTreeSlice`. The change makes the code shorter and a little bit faster on benchmarks that use macro expansion heavily, partly because `MatcherPos` is a lot smaller (less data to `memcpy`) and partly because ordinary slice operations are faster than `TokenTreeOrTokenTreeSlice::{len,get_tt}`. --- compiler/rustc_expand/src/mbe.rs | 88 +++++++++---------- compiler/rustc_expand/src/mbe/macro_check.rs | 16 ++-- compiler/rustc_expand/src/mbe/macro_parser.rs | 82 ++++++----------- compiler/rustc_expand/src/mbe/macro_rules.rs | 81 ++++++++--------- compiler/rustc_expand/src/mbe/quoted.rs | 30 +++---- compiler/rustc_expand/src/mbe/transcribe.rs | 22 +++-- 6 files changed, 146 insertions(+), 173 deletions(-) diff --git a/compiler/rustc_expand/src/mbe.rs b/compiler/rustc_expand/src/mbe.rs index 3d4c77aba7339..0f7a8a9ed7550 100644 --- a/compiler/rustc_expand/src/mbe.rs +++ b/compiler/rustc_expand/src/mbe.rs @@ -17,23 +17,48 @@ use rustc_data_structures::sync::Lrc; use rustc_span::symbol::Ident; use rustc_span::Span; -/// Contains the sub-token-trees of a "delimited" token tree, such as the contents of `(`. Note -/// that the delimiter itself might be `NoDelim`. +/// Contains the sub-token-trees of a "delimited" token tree such as `(a b c)`. The delimiter itself +/// might be `NoDelim`. #[derive(Clone, PartialEq, Encodable, Decodable, Debug)] struct Delimited { delim: token::DelimToken, - tts: Vec, + /// Note: This contains the opening and closing delimiters tokens (e.g. `(` and `)`). Note that + /// these could be `NoDelim`. These token kinds must match `delim`, and the methods below + /// debug_assert this. + all_tts: Vec, } impl Delimited { - /// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter. - fn open_tt(&self, span: DelimSpan) -> TokenTree { - TokenTree::token(token::OpenDelim(self.delim), span.open) + /// Returns a `self::TokenTree` with a `Span` corresponding to the opening delimiter. Panics if + /// the delimiter is `NoDelim`. + fn open_tt(&self) -> &TokenTree { + let tt = self.all_tts.first().unwrap(); + debug_assert!(matches!( + tt, + &TokenTree::Token(token::Token { kind: token::OpenDelim(d), .. }) if d == self.delim + )); + tt + } + + /// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter. Panics if + /// the delimeter is `NoDelim`. + fn close_tt(&self) -> &TokenTree { + let tt = self.all_tts.last().unwrap(); + debug_assert!(matches!( + tt, + &TokenTree::Token(token::Token { kind: token::CloseDelim(d), .. }) if d == self.delim + )); + tt } - /// Returns a `self::TokenTree` with a `Span` corresponding to the closing delimiter. - fn close_tt(&self, span: DelimSpan) -> TokenTree { - TokenTree::token(token::CloseDelim(self.delim), span.close) + /// Returns the tts excluding the outer delimiters. + /// + /// FIXME: #67062 has details about why this is sub-optimal. + fn inner_tts(&self) -> &[TokenTree] { + // These functions are called for the assertions within them. + let _open_tt = self.open_tt(); + let _close_tt = self.close_tt(); + &self.all_tts[1..self.all_tts.len() - 1] } } @@ -73,35 +98,24 @@ enum KleeneOp { ZeroOrOne, } -/// Similar to `tokenstream::TokenTree`, except that `$i`, `$i:ident`, `$(...)`, -/// and `${...}` are "first-class" token trees. Useful for parsing macros. +/// Similar to `tokenstream::TokenTree`, except that `Sequence`, `MetaVar`, `MetaVarDecl`, and +/// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros. #[derive(Debug, Clone, PartialEq, Encodable, Decodable)] enum TokenTree { Token(Token), + /// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS). Delimited(DelimSpan, Lrc), - /// A kleene-style repetition sequence + /// A kleene-style repetition sequence, e.g. `$($e:expr)*` (RHS) or `$($e),*` (LHS). Sequence(DelimSpan, Lrc), - /// e.g., `$var` + /// e.g., `$var`. MetaVar(Span, Ident), - /// e.g., `$var:expr`. This is only used in the left hand side of MBE macros. + /// e.g., `$var:expr`. Only appears on the LHS. MetaVarDecl(Span, Ident /* name to bind */, Option), - /// A meta-variable expression inside `${...}` + /// A meta-variable expression inside `${...}`. MetaVarExpr(DelimSpan, MetaVarExpr), } impl TokenTree { - /// Return the number of tokens in the tree. - fn len(&self) -> usize { - match *self { - TokenTree::Delimited(_, ref delimed) => match delimed.delim { - token::NoDelim => delimed.tts.len(), - _ => delimed.tts.len() + 2, - }, - TokenTree::Sequence(_, ref seq) => seq.tts.len(), - _ => 0, - } - } - /// Returns `true` if the given token tree is delimited. fn is_delimited(&self) -> bool { matches!(*self, TokenTree::Delimited(..)) @@ -115,26 +129,6 @@ impl TokenTree { } } - /// Gets the `index`-th sub-token-tree. This only makes sense for delimited trees and sequences. - fn get_tt(&self, index: usize) -> TokenTree { - match (self, index) { - (&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => { - delimed.tts[index].clone() - } - (&TokenTree::Delimited(span, ref delimed), _) => { - if index == 0 { - return delimed.open_tt(span); - } - if index == delimed.tts.len() + 1 { - return delimed.close_tt(span); - } - delimed.tts[index - 1].clone() - } - (&TokenTree::Sequence(_, ref seq), _) => seq.tts[index].clone(), - _ => panic!("Cannot expand a token tree"), - } - } - /// Retrieves the `TokenTree`'s span. fn span(&self) -> Span { match *self { diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs index f18cf95a2bf11..b55a40c5b2c4b 100644 --- a/compiler/rustc_expand/src/mbe/macro_check.rs +++ b/compiler/rustc_expand/src/mbe/macro_check.rs @@ -281,7 +281,7 @@ fn check_binders( // `MetaVarExpr` can not appear in the LHS of a macro arm TokenTree::MetaVarExpr(..) => {} TokenTree::Delimited(_, ref del) => { - for tt in &del.tts { + for tt in del.inner_tts() { check_binders(sess, node_id, tt, macros, binders, ops, valid); } } @@ -344,7 +344,7 @@ fn check_occurrences( check_ops_is_prefix(sess, node_id, macros, binders, ops, dl.entire(), name); } TokenTree::Delimited(_, ref del) => { - check_nested_occurrences(sess, node_id, &del.tts, macros, binders, ops, valid); + check_nested_occurrences(sess, node_id, del.inner_tts(), macros, binders, ops, valid); } TokenTree::Sequence(_, ref seq) => { let ops = ops.push(seq.kleene); @@ -431,14 +431,20 @@ fn check_nested_occurrences( { let macro_rules = state == NestedMacroState::MacroRulesNotName; state = NestedMacroState::Empty; - let rest = - check_nested_macro(sess, node_id, macro_rules, &del.tts, &nested_macros, valid); + let rest = check_nested_macro( + sess, + node_id, + macro_rules, + del.inner_tts(), + &nested_macros, + valid, + ); // If we did not check the whole macro definition, then check the rest as if outside // the macro definition. check_nested_occurrences( sess, node_id, - &del.tts[rest..], + &del.inner_tts()[rest..], macros, binders, ops, diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 674cf8554f20a..d8071bf159a74 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -72,9 +72,8 @@ crate use NamedMatch::*; crate use ParseResult::*; -use TokenTreeOrTokenTreeSlice::*; -use crate::mbe::{self, DelimSpan, SequenceRepetition, TokenTree}; +use crate::mbe::{self, SequenceRepetition, TokenTree}; use rustc_ast::token::{self, DocComment, Nonterminal, Token}; use rustc_parse::parser::Parser; @@ -90,35 +89,6 @@ use std::borrow::Cow; use std::collections::hash_map::Entry::{Occupied, Vacant}; use std::mem; -// To avoid costly uniqueness checks, we require that `MatchSeq` always has a nonempty body. - -/// Either a slice of token trees or a single one. This is used as the representation of the -/// token trees that make up a matcher. -#[derive(Clone)] -enum TokenTreeOrTokenTreeSlice<'tt> { - Tt(TokenTree), - TtSlice(&'tt [TokenTree]), -} - -impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { - /// Returns the number of constituent top-level token trees of `self` (top-level in that it - /// will not recursively descend into subtrees). - fn len(&self) -> usize { - match *self { - TtSlice(ref v) => v.len(), - Tt(ref tt) => tt.len(), - } - } - - /// The `index`-th token tree of `self`. - fn get_tt(&self, index: usize) -> TokenTree { - match *self { - TtSlice(ref v) => v[index].clone(), - Tt(ref tt) => tt.get_tt(index), - } - } -} - /// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`. /// /// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have @@ -126,7 +96,7 @@ impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { #[derive(Clone)] struct MatcherTtFrame<'tt> { /// The "parent" matcher that we are descending into. - elts: TokenTreeOrTokenTreeSlice<'tt>, + elts: &'tt [TokenTree], /// The position of the "dot" in `elts` at the time we descended. idx: usize, } @@ -138,7 +108,7 @@ type NamedMatchVec = SmallVec<[NamedMatch; 4]>; #[derive(Clone)] struct MatcherPos<'tt> { /// The token or slice of tokens that make up the matcher. `elts` is short for "elements". - top_elts: TokenTreeOrTokenTreeSlice<'tt>, + top_elts: &'tt [TokenTree], /// The position of the "dot" in this matcher idx: usize, @@ -183,7 +153,7 @@ struct MatcherPos<'tt> { // This type is used a lot. Make sure it doesn't unintentionally get bigger. #[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] -rustc_data_structures::static_assert_size!(MatcherPos<'_>, 232); +rustc_data_structures::static_assert_size!(MatcherPos<'_>, 136); impl<'tt> MatcherPos<'tt> { /// `len` `Vec`s (initially shared and empty) that will store matches of metavars. @@ -203,7 +173,7 @@ impl<'tt> MatcherPos<'tt> { let match_idx_hi = count_names(ms); MatcherPos { // Start with the top level matcher given to us. - top_elts: TtSlice(ms), + top_elts: ms, // The "dot" is before the first token of the matcher. idx: 0, @@ -224,9 +194,9 @@ impl<'tt> MatcherPos<'tt> { } } - fn repetition(up: Box>, sp: DelimSpan, seq: Lrc) -> Self { + fn repetition(up: Box>, seq: &'tt SequenceRepetition) -> Self { MatcherPos { - stack: smallvec![], + top_elts: &seq.tts, idx: 0, matches: Self::create_matches(up.matches.len()), match_lo: up.match_cur, @@ -237,7 +207,7 @@ impl<'tt> MatcherPos<'tt> { sep: seq.separator.clone(), seq_op: seq.kleene.op, }), - top_elts: Tt(TokenTree::Sequence(sp, seq)), + stack: smallvec![], } } @@ -288,8 +258,8 @@ crate type NamedParseResult = ParseResult usize { ms.iter().fold(0, |count, elt| { count - + match *elt { - TokenTree::Delimited(_, ref delim) => count_names(&delim.tts), + + match elt { + TokenTree::Delimited(_, delim) => count_names(delim.inner_tts()), TokenTree::MetaVar(..) => 0, TokenTree::MetaVarDecl(..) => 1, // Panicking here would abort execution because `parse_tree` makes use of this @@ -298,7 +268,7 @@ pub(super) fn count_names(ms: &[TokenTree]) -> usize { // `0` is still returned to inform that no meta-variable was found. `Meta-variables // != Meta-variable expressions` TokenTree::MetaVarExpr(..) => 0, - TokenTree::Sequence(_, ref seq) => seq.num_captures, + TokenTree::Sequence(_, seq) => seq.num_captures, TokenTree::Token(..) => 0, } }) @@ -382,7 +352,7 @@ fn nameize>( } } TokenTree::Delimited(_, ref delim) => { - for next_m in &delim.tts { + for next_m in delim.inner_tts() { n_rec(sess, next_m, res.by_ref(), ret_val)?; } } @@ -446,8 +416,8 @@ pub struct TtParser<'tt> { } impl<'tt> TtParser<'tt> { - pub(super) fn new(macro_name: Ident) -> Self { - Self { macro_name, cur_items: vec![], next_items: vec![], bb_items: vec![] } + pub(super) fn new(macro_name: Ident) -> TtParser<'tt> { + TtParser { macro_name, cur_items: vec![], next_items: vec![], bb_items: vec![] } } /// Process the matcher positions of `cur_items` until it is empty. In the process, this will @@ -492,8 +462,8 @@ impl<'tt> TtParser<'tt> { if idx < len { // We are in the middle of a matcher. Compare the matcher's current tt against // `token`. - match item.top_elts.get_tt(idx) { - TokenTree::Sequence(sp, seq) => { + match &item.top_elts[idx] { + TokenTree::Sequence(_sp, seq) => { let op = seq.kleene.op; if op == mbe::KleeneOp::ZeroOrMore || op == mbe::KleeneOp::ZeroOrOne { // Allow for the possibility of zero matches of this sequence. @@ -507,17 +477,17 @@ impl<'tt> TtParser<'tt> { } // Allow for the possibility of one or more matches of this sequence. - self.cur_items.push(box MatcherPos::repetition(item, sp, seq)); + self.cur_items.push(box MatcherPos::repetition(item, &seq)); } - TokenTree::MetaVarDecl(span, _, None) => { + &TokenTree::MetaVarDecl(span, _, None) => { // E.g. `$e` instead of `$e:expr`. if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { return Some(Error(span, "missing fragment specifier".to_string())); } } - TokenTree::MetaVarDecl(_, _, Some(kind)) => { + &TokenTree::MetaVarDecl(_, _, Some(kind)) => { // Built-in nonterminals never start with these tokens, so we can eliminate // them from consideration. // @@ -528,13 +498,14 @@ impl<'tt> TtParser<'tt> { } } - seq @ TokenTree::Delimited(..) => { + TokenTree::Delimited(_, delimited) => { // To descend into a delimited submatcher, we push the current matcher onto // a stack and push a new item containing the submatcher onto `cur_items`. // // At the beginning of the loop, if we reach the end of the delimited - // submatcher, we pop the stack to backtrack out of the descent. - let lower_elts = mem::replace(&mut item.top_elts, Tt(seq)); + // submatcher, we pop the stack to backtrack out of the descent. Note that + // we use `all_tts` to include the open and close delimiter tokens. + let lower_elts = mem::replace(&mut item.top_elts, &delimited.all_tts); let idx = item.idx; item.stack.push(MatcherTtFrame { elts: lower_elts, idx }); item.idx = 0; @@ -560,7 +531,6 @@ impl<'tt> TtParser<'tt> { } else if let Some(repetition) = &item.repetition { // We are past the end of a repetition. debug_assert!(idx <= len + 1); - debug_assert!(matches!(item.top_elts, Tt(TokenTree::Sequence(..)))); if idx == len { // Add all matches from the sequence to `up`, and move the "dot" past the @@ -678,9 +648,7 @@ impl<'tt> TtParser<'tt> { (0, 1) => { // We need to call the black-box parser to get some nonterminal. let mut item = self.bb_items.pop().unwrap(); - if let TokenTree::MetaVarDecl(span, _, Some(kind)) = - item.top_elts.get_tt(item.idx) - { + if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts[item.idx] { let match_cur = item.match_cur; // We use the span of the metavariable declaration to determine any // edition-specific matching behavior for non-terminals. @@ -720,7 +688,7 @@ impl<'tt> TtParser<'tt> { let nts = self .bb_items .iter() - .map(|item| match item.top_elts.get_tt(item.idx) { + .map(|item| match item.top_elts[item.idx] { TokenTree::MetaVarDecl(_, bind, Some(kind)) => { format!("{} ('{}')", kind, bind) } diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index db4d55256b6c0..f13b97251d210 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -203,15 +203,15 @@ fn trace_macros_note(cx_expansions: &mut FxHashMap>, sp: Span, } /// Given `lhses` and `rhses`, this is the new macro we create -fn generic_extension<'cx>( +fn generic_extension<'cx, 'tt>( cx: &'cx mut ExtCtxt<'_>, sp: Span, def_span: Span, name: Ident, transparency: Transparency, arg: TokenStream, - lhses: &[mbe::TokenTree], - rhses: &[mbe::TokenTree], + lhses: &'tt [mbe::TokenTree], + rhses: &'tt [mbe::TokenTree], is_local: bool, ) -> Box { let sess = &cx.sess.parse_sess; @@ -245,31 +245,30 @@ fn generic_extension<'cx>( // this situation.) let parser = parser_from_cx(sess, arg.clone()); - let mut tt_parser = TtParser::new(name); - for (i, lhs) in lhses.iter().enumerate() { - // try each arm's matchers - let lhs_tt = match *lhs { - mbe::TokenTree::Delimited(_, ref delim) => &delim.tts, + // A matcher is always delimited, but the delimiters are ignored. + let delimited_inner_tts = |tt: &'tt mbe::TokenTree| -> &'tt [mbe::TokenTree] { + match tt { + mbe::TokenTree::Delimited(_, delimited) => delimited.inner_tts(), _ => cx.span_bug(sp, "malformed macro lhs"), - }; + } + }; + // Try each arm's matchers. + let mut tt_parser = TtParser::new(name); + for (i, lhs) in lhses.iter().enumerate() { // Take a snapshot of the state of pre-expansion gating at this point. // This is used so that if a matcher is not `Success(..)`ful, // then the spans which became gated when parsing the unsuccessful matcher // are not recorded. On the first `Success(..)`ful matcher, the spans are merged. let mut gated_spans_snapshot = mem::take(&mut *sess.gated_spans.spans.borrow_mut()); - match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), lhs_tt) { + match tt_parser.parse_tt(&mut Cow::Borrowed(&parser), delimited_inner_tts(lhs)) { Success(named_matches) => { // The matcher was `Success(..)`ful. // Merge the gated spans from parsing the matcher with the pre-existing ones. sess.gated_spans.merge(gated_spans_snapshot); - let rhs = match rhses[i] { - // ignore delimiters - mbe::TokenTree::Delimited(_, ref delimed) => delimed.tts.clone(), - _ => cx.span_bug(sp, "malformed macro rhs"), - }; + let rhs = delimited_inner_tts(&rhses[i]).to_vec().clone(); let arm_span = rhses[i].span(); let rhs_spans = rhs.iter().map(|t| t.span()).collect::>(); @@ -347,14 +346,10 @@ fn generic_extension<'cx>( // Check whether there's a missing comma in this macro call, like `println!("{}" a);` if let Some((arg, comma_span)) = arg.add_comma() { for lhs in lhses { - // try each arm's matchers - let lhs_tt = match *lhs { - mbe::TokenTree::Delimited(_, ref delim) => &delim.tts, - _ => continue, - }; - if let Success(_) = - tt_parser.parse_tt(&mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), lhs_tt) - { + if let Success(_) = tt_parser.parse_tt( + &mut Cow::Borrowed(&parser_from_cx(sess, arg.clone())), + delimited_inner_tts(lhs), + ) { if comma_span.is_dummy() { err.note("you might be missing a comma"); } else { @@ -477,16 +472,17 @@ pub fn compile_declarative_macro( .map(|m| { if let MatchedNonterminal(ref nt) = *m { if let NtTT(ref tt) = **nt { - let tt = mbe::quoted::parse( + let mut tts = vec![]; + mbe::quoted::parse( tt.clone().into(), true, &sess.parse_sess, def.id, features, edition, - ) - .pop() - .unwrap(); + &mut tts, + ); + let tt = tts.pop().unwrap(); valid &= check_lhs_nt_follows(&sess.parse_sess, features, &def, &tt); return tt; } @@ -503,16 +499,17 @@ pub fn compile_declarative_macro( .map(|m| { if let MatchedNonterminal(ref nt) = *m { if let NtTT(ref tt) = **nt { - return mbe::quoted::parse( + let mut tts = vec![]; + mbe::quoted::parse( tt.clone().into(), false, &sess.parse_sess, def.id, features, edition, - ) - .pop() - .unwrap(); + &mut tts, + ); + return tts.pop().unwrap(); } } sess.parse_sess.span_diagnostic.span_bug(def.span, "wrong-structured lhs") @@ -564,8 +561,8 @@ fn check_lhs_nt_follows( ) -> bool { // lhs is going to be like TokenTree::Delimited(...), where the // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. - if let mbe::TokenTree::Delimited(_, ref tts) = *lhs { - check_matcher(sess, features, def, &tts.tts) + if let mbe::TokenTree::Delimited(_, delimited) = lhs { + check_matcher(sess, features, def, delimited.inner_tts()) } else { let msg = "invalid macro matcher; matchers must be contained in balanced delimiters"; sess.span_diagnostic.span_err(lhs.span(), msg); @@ -586,7 +583,7 @@ fn check_lhs_no_empty_seq(sess: &ParseSess, tts: &[mbe::TokenTree]) -> bool { | TokenTree::MetaVarDecl(..) | TokenTree::MetaVarExpr(..) => (), TokenTree::Delimited(_, ref del) => { - if !check_lhs_no_empty_seq(sess, &del.tts) { + if !check_lhs_no_empty_seq(sess, del.inner_tts()) { return false; } } @@ -681,9 +678,9 @@ impl FirstSets { | TokenTree::MetaVarExpr(..) => { first.replace_with(tt.clone()); } - TokenTree::Delimited(span, ref delimited) => { - build_recur(sets, &delimited.tts); - first.replace_with(delimited.open_tt(span)); + TokenTree::Delimited(_span, ref delimited) => { + build_recur(sets, delimited.inner_tts()); + first.replace_with(delimited.open_tt().clone()); } TokenTree::Sequence(sp, ref seq_rep) => { let subfirst = build_recur(sets, &seq_rep.tts); @@ -747,8 +744,8 @@ impl FirstSets { first.add_one(tt.clone()); return first; } - TokenTree::Delimited(span, ref delimited) => { - first.add_one(delimited.open_tt(span)); + TokenTree::Delimited(_span, ref delimited) => { + first.add_one(delimited.open_tt().clone()); return first; } TokenTree::Sequence(sp, ref seq_rep) => { @@ -934,9 +931,9 @@ fn check_matcher_core( suffix_first = build_suffix_first(); } } - TokenTree::Delimited(span, ref d) => { - let my_suffix = TokenSet::singleton(d.close_tt(span)); - check_matcher_core(sess, features, def, first_sets, &d.tts, &my_suffix); + TokenTree::Delimited(_span, ref d) => { + let my_suffix = TokenSet::singleton(d.close_tt().clone()); + check_matcher_core(sess, features, def, first_sets, d.inner_tts(), &my_suffix); // don't track non NT tokens last.replace_with_irrelevant(); diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs index 12c5dac9e0bf4..b3ed6b8e4db24 100644 --- a/compiler/rustc_expand/src/mbe/quoted.rs +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -45,10 +45,8 @@ pub(super) fn parse( node_id: NodeId, features: &Features, edition: Edition, -) -> Vec { - // Will contain the final collection of `self::TokenTree` - let mut result = Vec::new(); - + result: &mut Vec, +) { // For each token tree in `input`, parse the token into a `self::TokenTree`, consuming // additional trees if need be. let mut trees = input.trees(); @@ -115,7 +113,6 @@ pub(super) fn parse( _ => result.push(tree), } } - result } /// Asks for the `macro_metavar_expr` feature if it is not already declared @@ -208,7 +205,8 @@ fn parse_tree( // If we didn't find a metavar expression above, then we must have a // repetition sequence in the macro (e.g. `$(pat)*`). Parse the // contents of the sequence itself - let sequence = parse(tts, parsing_patterns, sess, node_id, features, edition); + let mut sequence = vec![]; + parse(tts, parsing_patterns, sess, node_id, features, edition, &mut sequence); // Get the Kleene operator and optional separator let (separator, kleene) = parse_sep_and_kleene_op(&mut trees, delim_span.entire(), sess); @@ -225,8 +223,8 @@ fn parse_tree( ) } - // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` special - // metavariable that names the crate of the invocation. + // `tree` is followed by an `ident`. This could be `$meta_var` or the `$crate` + // special metavariable that names the crate of the invocation. Some(tokenstream::TokenTree::Token(token)) if token.is_ident() => { let (ident, is_raw) = token.ident().unwrap(); let span = ident.span.with_lo(span.lo()); @@ -270,13 +268,15 @@ fn parse_tree( // `tree` is the beginning of a delimited set of tokens (e.g., `(` or `{`). We need to // descend into the delimited set and further parse it. - tokenstream::TokenTree::Delimited(span, delim, tts) => TokenTree::Delimited( - span, - Lrc::new(Delimited { - delim, - tts: parse(tts, parsing_patterns, sess, node_id, features, edition), - }), - ), + tokenstream::TokenTree::Delimited(span, delim, tts) => { + let mut all_tts = vec![]; + // Add the explicit open and close delimiters, which + // `tokenstream::TokenTree::Delimited` lacks. + all_tts.push(TokenTree::token(token::OpenDelim(delim), span.open)); + parse(tts, parsing_patterns, sess, node_id, features, edition, &mut all_tts); + all_tts.push(TokenTree::token(token::CloseDelim(delim), span.close)); + TokenTree::Delimited(span, Lrc::new(Delimited { delim, all_tts })) + } } } diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index 5ec63739cf574..6566338d44feb 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -10,7 +10,7 @@ use rustc_errors::{pluralize, PResult}; use rustc_errors::{DiagnosticBuilder, ErrorGuaranteed}; use rustc_span::hygiene::{LocalExpnId, Transparency}; use rustc_span::symbol::{sym, Ident, MacroRulesNormalizedIdent}; -use rustc_span::Span; +use rustc_span::{Span, DUMMY_SP}; use smallvec::{smallvec, SmallVec}; use std::mem; @@ -34,8 +34,14 @@ enum Frame { impl Frame { /// Construct a new frame around the delimited set of tokens. - fn new(tts: Vec) -> Frame { - let forest = Lrc::new(mbe::Delimited { delim: token::NoDelim, tts }); + fn new(mut tts: Vec) -> Frame { + // Need to add empty delimeters. + let open_tt = mbe::TokenTree::token(token::OpenDelim(token::NoDelim), DUMMY_SP); + let close_tt = mbe::TokenTree::token(token::CloseDelim(token::NoDelim), DUMMY_SP); + tts.insert(0, open_tt); + tts.push(close_tt); + + let forest = Lrc::new(mbe::Delimited { delim: token::NoDelim, all_tts: tts }); Frame::Delimited { forest, idx: 0, span: DelimSpan::dummy() } } } @@ -46,12 +52,14 @@ impl Iterator for Frame { fn next(&mut self) -> Option { match *self { Frame::Delimited { ref forest, ref mut idx, .. } => { + let res = forest.inner_tts().get(*idx).cloned(); *idx += 1; - forest.tts.get(*idx - 1).cloned() + res } Frame::Sequence { ref forest, ref mut idx, .. } => { + let res = forest.tts.get(*idx).cloned(); *idx += 1; - forest.tts.get(*idx - 1).cloned() + res } } } @@ -376,8 +384,8 @@ fn lockstep_iter_size( ) -> LockstepIterSize { use mbe::TokenTree; match *tree { - TokenTree::Delimited(_, ref delimed) => { - delimed.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| { + TokenTree::Delimited(_, ref delimited) => { + delimited.inner_tts().iter().fold(LockstepIterSize::Unconstrained, |size, tt| { size.with(lockstep_iter_size(tt, interpolations, repeats)) }) }