From f2d14077439a3037941781b7b53ea4e9ef19f52c Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Tue, 17 Jan 2017 03:14:42 +0000 Subject: [PATCH 1/8] Remove field `tokens_consumed` of `Parser`. --- src/libsyntax/parse/parser.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 167fa78d7e075..9ba6d4d17f794 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -185,7 +185,6 @@ pub struct Parser<'a> { /// the previous token kind prev_token_kind: PrevTokenKind, lookahead_buffer: LookaheadBuffer, - pub tokens_consumed: usize, pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter parsing_token_tree: bool, @@ -282,7 +281,6 @@ impl<'a> Parser<'a> { prev_span: syntax_pos::DUMMY_SP, prev_token_kind: PrevTokenKind::Other, lookahead_buffer: Default::default(), - tokens_consumed: 0, restrictions: Restrictions::empty(), quote_depth: 0, parsing_token_tree: false, @@ -902,7 +900,6 @@ impl<'a> Parser<'a> { }; self.span = next.sp; self.token = next.tok; - self.tokens_consumed += 1; self.expected_tokens.clear(); // check after each token self.check_unknown_macro_variable(); From 6466f55ebca18e3795800d8d606622d36f6ee763 Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Tue, 17 Jan 2017 01:14:53 +0000 Subject: [PATCH 2/8] Give the `StringReader` a `sess: &ParseSess`. --- src/librustc/hir/print.rs | 13 ++- src/librustc_driver/pretty.rs | 8 +- src/librustc_save_analysis/span_utils.rs | 3 +- src/libsyntax/ext/expand.rs | 2 +- src/libsyntax/parse/lexer/comments.rs | 9 +-- src/libsyntax/parse/lexer/mod.rs | 92 +++++++++++----------- src/libsyntax/parse/lexer/unicode_chars.rs | 6 +- src/libsyntax/parse/mod.rs | 2 +- src/libsyntax/print/pprust.rs | 21 ++--- 9 files changed, 68 insertions(+), 88 deletions(-) diff --git a/src/librustc/hir/print.rs b/src/librustc/hir/print.rs index c06c53810d753..c0818f052866e 100644 --- a/src/librustc/hir/print.rs +++ b/src/librustc/hir/print.rs @@ -13,6 +13,7 @@ pub use self::AnnNode::*; use syntax::abi::Abi; use syntax::ast; use syntax::codemap::{CodeMap, Spanned}; +use syntax::parse::ParseSess; use syntax::parse::lexer::comments; use syntax::print::pp::{self, break_offset, word, space, hardbreak}; use syntax::print::pp::{Breaks, eof}; @@ -21,7 +22,6 @@ use syntax::print::pprust::{self as ast_pp, PrintState}; use syntax::ptr::P; use syntax::symbol::keywords; use syntax_pos::{self, BytePos}; -use errors; use hir; use hir::{PatKind, RegionTyParamBound, TraitTyParamBound, TraitBoundModifier}; @@ -116,7 +116,7 @@ pub const default_columns: usize = 78; /// it can scan the input text for comments and literals to /// copy forward. pub fn print_crate<'a>(cm: &'a CodeMap, - span_diagnostic: &errors::Handler, + sess: &ParseSess, krate: &hir::Crate, filename: String, input: &mut Read, @@ -124,8 +124,7 @@ pub fn print_crate<'a>(cm: &'a CodeMap, ann: &'a PpAnn, is_expanded: bool) -> io::Result<()> { - let mut s = State::new_from_input(cm, span_diagnostic, filename, input, - out, ann, is_expanded); + let mut s = State::new_from_input(cm, sess, filename, input, out, ann, is_expanded); // When printing the AST, we sometimes need to inject `#[no_std]` here. // Since you can't compile the HIR, it's not necessary. @@ -137,16 +136,14 @@ pub fn print_crate<'a>(cm: &'a CodeMap, impl<'a> State<'a> { pub fn new_from_input(cm: &'a CodeMap, - span_diagnostic: &errors::Handler, + sess: &ParseSess, filename: String, input: &mut Read, out: Box, ann: &'a PpAnn, is_expanded: bool) -> State<'a> { - let (cmnts, lits) = comments::gather_comments_and_literals(span_diagnostic, - filename, - input); + let (cmnts, lits) = comments::gather_comments_and_literals(sess, filename, input); State::new(cm, out, diff --git a/src/librustc_driver/pretty.rs b/src/librustc_driver/pretty.rs index afacfb6e3f9fd..3c8a529bdaee8 100644 --- a/src/librustc_driver/pretty.rs +++ b/src/librustc_driver/pretty.rs @@ -838,7 +838,7 @@ pub fn print_after_parsing(sess: &Session, debug!("pretty printing source code {:?}", s); let sess = annotation.sess(); pprust::print_crate(sess.codemap(), - sess.diagnostic(), + &sess.parse_sess, krate, src_name.to_string(), &mut rdr, @@ -896,7 +896,7 @@ pub fn print_after_hir_lowering<'tcx, 'a: 'tcx>(sess: &'a Session, debug!("pretty printing source code {:?}", s); let sess = annotation.sess(); pprust::print_crate(sess.codemap(), - sess.diagnostic(), + &sess.parse_sess, krate, src_name.to_string(), &mut rdr, @@ -920,7 +920,7 @@ pub fn print_after_hir_lowering<'tcx, 'a: 'tcx>(sess: &'a Session, debug!("pretty printing source code {:?}", s); let sess = annotation.sess(); pprust_hir::print_crate(sess.codemap(), - sess.diagnostic(), + &sess.parse_sess, krate, src_name.to_string(), &mut rdr, @@ -945,7 +945,7 @@ pub fn print_after_hir_lowering<'tcx, 'a: 'tcx>(sess: &'a Session, let sess = annotation.sess(); let ast_map = annotation.ast_map().expect("--unpretty missing HIR map"); let mut pp_state = pprust_hir::State::new_from_input(sess.codemap(), - sess.diagnostic(), + &sess.parse_sess, src_name.to_string(), &mut rdr, box out, diff --git a/src/librustc_save_analysis/span_utils.rs b/src/librustc_save_analysis/span_utils.rs index 448bb2e761749..ebfea90527cd0 100644 --- a/src/librustc_save_analysis/span_utils.rs +++ b/src/librustc_save_analysis/span_utils.rs @@ -85,8 +85,7 @@ impl<'a> SpanUtils<'a> { let filemap = self.sess .codemap() .new_filemap(String::from(""), None, self.snippet(span)); - let s = self.sess; - lexer::StringReader::new(s.diagnostic(), filemap) + lexer::StringReader::new(&self.sess.parse_sess, filemap) } fn span_to_tts(&self, span: Span) -> Vec { diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 201e8d69494ac..4b9b6518b4800 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -648,7 +648,7 @@ fn string_to_tts(text: String, parse_sess: &ParseSess) -> Vec { let filemap = parse_sess.codemap() .new_filemap(String::from(""), None, text); - let lexer = lexer::StringReader::new(&parse_sess.span_diagnostic, filemap); + let lexer = lexer::StringReader::new(parse_sess, filemap); let mut parser = Parser::new(parse_sess, Box::new(lexer), None, false); panictry!(parser.parse_all_token_trees()) } diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index ba83a55ea7937..8c94cf67bf6e6 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -13,11 +13,10 @@ pub use self::CommentStyle::*; use ast; use codemap::CodeMap; use syntax_pos::{BytePos, CharPos, Pos}; -use errors; use parse::lexer::is_block_doc_comment; use parse::lexer::{StringReader, TokenAndSpan}; use parse::lexer::{is_pattern_whitespace, Reader}; -use parse::lexer; +use parse::{lexer, ParseSess}; use print::pprust; use str::char_at; @@ -346,16 +345,14 @@ pub struct Literal { // it appears this function is called only from pprust... that's // probably not a good thing. -pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler, - path: String, - srdr: &mut Read) +pub fn gather_comments_and_literals(sess: &ParseSess, path: String, srdr: &mut Read) -> (Vec, Vec) { let mut src = Vec::new(); srdr.read_to_end(&mut src).unwrap(); let src = String::from_utf8(src).unwrap(); let cm = CodeMap::new(); let filemap = cm.new_filemap(path, None, src); - let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap); + let mut rdr = lexer::StringReader::new_raw(sess, filemap); let mut comments: Vec = Vec::new(); let mut literals: Vec = Vec::new(); diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 818742e4492f2..f1cb81a4c7de5 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -11,9 +11,9 @@ use ast::{self, Ident}; use syntax_pos::{self, BytePos, CharPos, Pos, Span}; use codemap::CodeMap; -use errors::{FatalError, Handler, DiagnosticBuilder}; +use errors::{FatalError, DiagnosticBuilder}; use ext::tt::transcribe::tt_next_token; -use parse::token; +use parse::{token, ParseSess}; use str::char_at; use symbol::{Symbol, keywords}; use std_unicode::property::Pattern_White_Space; @@ -82,7 +82,7 @@ impl Default for TokenAndSpan { } pub struct StringReader<'a> { - pub span_diagnostic: &'a Handler, + pub sess: &'a ParseSess, /// The absolute offset within the codemap of the next character to read pub next_pos: BytePos, /// The absolute offset within the codemap of the current character @@ -181,27 +181,22 @@ impl<'a> Reader for TtReader<'a> { impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into next_pos and ch - pub fn new_raw<'b>(span_diagnostic: &'b Handler, - filemap: Rc) - -> StringReader<'b> { - let mut sr = StringReader::new_raw_internal(span_diagnostic, filemap); + pub fn new_raw<'b>(sess: &'a ParseSess, filemap: Rc) -> Self { + let mut sr = StringReader::new_raw_internal(sess, filemap); sr.bump(); sr } - fn new_raw_internal<'b>(span_diagnostic: &'b Handler, - filemap: Rc) - -> StringReader<'b> { + fn new_raw_internal(sess: &'a ParseSess, filemap: Rc) -> Self { if filemap.src.is_none() { - span_diagnostic.bug(&format!("Cannot lex filemap \ - without source: {}", - filemap.name)[..]); + sess.span_diagnostic.bug(&format!("Cannot lex filemap without source: {}", + filemap.name)); } let source_text = (*filemap.src.as_ref().unwrap()).clone(); StringReader { - span_diagnostic: span_diagnostic, + sess: sess, next_pos: filemap.start_pos, pos: filemap.start_pos, col: CharPos(0), @@ -217,10 +212,8 @@ impl<'a> StringReader<'a> { } } - pub fn new<'b>(span_diagnostic: &'b Handler, - filemap: Rc) - -> StringReader<'b> { - let mut sr = StringReader::new_raw(span_diagnostic, filemap); + pub fn new(sess: &'a ParseSess, filemap: Rc) -> Self { + let mut sr = StringReader::new_raw(sess, filemap); if let Err(_) = sr.advance_token() { sr.emit_fatal_errors(); panic!(FatalError); @@ -234,12 +227,12 @@ impl<'a> StringReader<'a> { /// Report a fatal lexical error with a given span. pub fn fatal_span(&self, sp: Span, m: &str) -> FatalError { - self.span_diagnostic.span_fatal(sp, m) + self.sess.span_diagnostic.span_fatal(sp, m) } /// Report a lexical error with a given span. pub fn err_span(&self, sp: Span, m: &str) { - self.span_diagnostic.span_err(sp, m) + self.sess.span_diagnostic.span_err(sp, m) } @@ -274,7 +267,7 @@ impl<'a> StringReader<'a> { for c in c.escape_default() { m.push(c) } - self.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) + self.sess.span_diagnostic.struct_span_fatal(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) } /// Report a lexical error spanning [`from_pos`, `to_pos`), appending an @@ -298,7 +291,7 @@ impl<'a> StringReader<'a> { for c in c.escape_default() { m.push(c) } - self.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) + self.sess.span_diagnostic.struct_span_err(syntax_pos::mk_sp(from_pos, to_pos), &m[..]) } /// Report a lexical error spanning [`from_pos`, `to_pos`), appending the @@ -503,9 +496,8 @@ impl<'a> StringReader<'a> { fn scan_comment(&mut self) -> Option { if let Some(c) = self.ch { if c.is_whitespace() { - self.span_diagnostic.span_err(syntax_pos::mk_sp(self.pos, self.pos), - "called consume_any_line_comment, but there \ - was whitespace"); + let msg = "called consume_any_line_comment, but there was whitespace"; + self.sess.span_diagnostic.span_err(syntax_pos::mk_sp(self.pos, self.pos), msg); } } @@ -875,7 +867,7 @@ impl<'a> StringReader<'a> { self.scan_unicode_escape(delim) && !ascii_only } else { let span = syntax_pos::mk_sp(start, self.pos); - self.span_diagnostic + self.sess.span_diagnostic .struct_span_err(span, "incorrect unicode escape sequence") .span_help(span, "format of unicode escape sequences is \ @@ -1701,35 +1693,41 @@ fn ident_continue(c: Option) -> bool { mod tests { use super::*; - use ast::Ident; + use ast::{Ident, CrateConfig}; use symbol::Symbol; use syntax_pos::{BytePos, Span, NO_EXPANSION}; use codemap::CodeMap; use errors; + use feature_gate::UnstableFeatures; use parse::token; + use std::cell::RefCell; use std::io; use std::rc::Rc; - fn mk_sh(cm: Rc) -> errors::Handler { - // FIXME (#22405): Replace `Box::new` with `box` here when/if possible. - let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), - Some(cm)); - errors::Handler::with_emitter(true, false, Box::new(emitter)) + fn mk_sess(cm: Rc) -> ParseSess { + let emitter = errors::emitter::EmitterWriter::new(Box::new(io::sink()), Some(cm.clone())); + ParseSess { + span_diagnostic: errors::Handler::with_emitter(true, false, Box::new(emitter)), + unstable_features: UnstableFeatures::from_environment(), + config: CrateConfig::new(), + included_mod_stack: RefCell::new(Vec::new()), + code_map: cm, + } } // open a string reader for the given string fn setup<'a>(cm: &CodeMap, - span_handler: &'a errors::Handler, + sess: &'a ParseSess, teststr: String) -> StringReader<'a> { let fm = cm.new_filemap("zebra.rs".to_string(), None, teststr); - StringReader::new(span_handler, fm) + StringReader::new(sess, fm) } #[test] fn t1() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); let mut string_reader = setup(&cm, &sh, "/* my source file */ fn main() { println!(\"zebra\"); }\n" @@ -1781,7 +1779,7 @@ mod tests { #[test] fn doublecolonparsing() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a b".to_string()), vec![mk_ident("a"), token::Whitespace, mk_ident("b")]); } @@ -1789,7 +1787,7 @@ mod tests { #[test] fn dcparsing_2() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a::b".to_string()), vec![mk_ident("a"), token::ModSep, mk_ident("b")]); } @@ -1797,7 +1795,7 @@ mod tests { #[test] fn dcparsing_3() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a ::b".to_string()), vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]); } @@ -1805,7 +1803,7 @@ mod tests { #[test] fn dcparsing_4() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); check_tokenization(setup(&cm, &sh, "a:: b".to_string()), vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]); } @@ -1813,7 +1811,7 @@ mod tests { #[test] fn character_a() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok, token::Literal(token::Char(Symbol::intern("a")), None)); } @@ -1821,7 +1819,7 @@ mod tests { #[test] fn character_space() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok, token::Literal(token::Char(Symbol::intern(" ")), None)); } @@ -1829,7 +1827,7 @@ mod tests { #[test] fn character_escaped() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok, token::Literal(token::Char(Symbol::intern("\\n")), None)); } @@ -1837,7 +1835,7 @@ mod tests { #[test] fn lifetime_name() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok, token::Lifetime(Ident::from_str("'abc"))); } @@ -1845,7 +1843,7 @@ mod tests { #[test] fn raw_string() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string()) .next_token() .tok, @@ -1855,7 +1853,7 @@ mod tests { #[test] fn literal_suffixes() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); macro_rules! test { ($input: expr, $tok_type: ident, $tok_contents: expr) => {{ assert_eq!(setup(&cm, &sh, format!("{}suffix", $input)).next_token().tok, @@ -1899,7 +1897,7 @@ mod tests { #[test] fn nested_block_comments() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string()); match lexer.next_token().tok { token::Comment => {} @@ -1912,7 +1910,7 @@ mod tests { #[test] fn crlf_comments() { let cm = Rc::new(CodeMap::new()); - let sh = mk_sh(cm.clone()); + let sh = mk_sess(cm.clone()); let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string()); let comment = lexer.next_token(); assert_eq!(comment.tok, token::Comment); diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs index 1e08b20b7e1f4..6da3e5de75cdc 100644 --- a/src/libsyntax/parse/lexer/unicode_chars.rs +++ b/src/libsyntax/parse/lexer/unicode_chars.rs @@ -243,10 +243,8 @@ pub fn check_for_substitution<'a>(reader: &StringReader<'a>, err.span_help(span, &msg); }, None => { - reader - .span_diagnostic - .span_bug_no_panic(span, - &format!("substitution character not found for '{}'", ch)); + let msg = format!("substitution character not found for '{}'", ch); + reader.sess.span_diagnostic.span_bug_no_panic(span, &msg); } } }); diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 32b61a88ac17f..74b313ba395a3 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -223,7 +223,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) -> Vec { // it appears to me that the cfg doesn't matter here... indeed, // parsing tt's probably shouldn't require a parser at all. - let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap); + let srdr = lexer::StringReader::new(sess, filemap); let mut p1 = Parser::new(sess, Box::new(srdr), None, false); panictry!(p1.parse_all_token_trees()) } diff --git a/src/libsyntax/print/pprust.rs b/src/libsyntax/print/pprust.rs index ff77732f5354c..33b4636ce0899 100644 --- a/src/libsyntax/print/pprust.rs +++ b/src/libsyntax/print/pprust.rs @@ -18,10 +18,9 @@ use util::parser::AssocOp; use attr; use codemap::{self, CodeMap}; use syntax_pos::{self, BytePos}; -use errors; use parse::token::{self, BinOpToken, Token}; use parse::lexer::comments; -use parse; +use parse::{self, ParseSess}; use print::pp::{self, break_offset, word, space, zerobreak, hardbreak}; use print::pp::{Breaks, eof}; use print::pp::Breaks::{Consistent, Inconsistent}; @@ -101,20 +100,15 @@ pub const DEFAULT_COLUMNS: usize = 78; /// it can scan the input text for comments and literals to /// copy forward. pub fn print_crate<'a>(cm: &'a CodeMap, - span_diagnostic: &errors::Handler, + sess: &ParseSess, krate: &ast::Crate, filename: String, input: &mut Read, out: Box, ann: &'a PpAnn, is_expanded: bool) -> io::Result<()> { - let mut s = State::new_from_input(cm, - span_diagnostic, - filename, - input, - out, - ann, - is_expanded); + let mut s = State::new_from_input(cm, sess, filename, input, out, ann, is_expanded); + if is_expanded && !std_inject::injected_crate_name(krate).is_none() { // We need to print `#![no_std]` (and its feature gate) so that // compiling pretty-printed source won't inject libstd again. @@ -140,16 +134,13 @@ pub fn print_crate<'a>(cm: &'a CodeMap, impl<'a> State<'a> { pub fn new_from_input(cm: &'a CodeMap, - span_diagnostic: &errors::Handler, + sess: &ParseSess, filename: String, input: &mut Read, out: Box, ann: &'a PpAnn, is_expanded: bool) -> State<'a> { - let (cmnts, lits) = comments::gather_comments_and_literals( - span_diagnostic, - filename, - input); + let (cmnts, lits) = comments::gather_comments_and_literals(sess, filename, input); State::new( cm, From de46b247585999ae70674f1fa0543d62f2889c7f Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Thu, 12 Jan 2017 23:32:00 +0000 Subject: [PATCH 3/8] Introduce `string_reader.parse_all_token_trees()`. --- src/librustc_save_analysis/span_utils.rs | 10 +- src/libsyntax/ext/expand.rs | 10 +- src/libsyntax/parse/lexer/comments.rs | 6 +- src/libsyntax/parse/lexer/mod.rs | 44 +++++++- src/libsyntax/parse/lexer/tokentrees.rs | 138 +++++++++++++++++++++++ src/libsyntax/parse/mod.rs | 11 +- 6 files changed, 192 insertions(+), 27 deletions(-) create mode 100644 src/libsyntax/parse/lexer/tokentrees.rs diff --git a/src/librustc_save_analysis/span_utils.rs b/src/librustc_save_analysis/span_utils.rs index ebfea90527cd0..89525b27ed36a 100644 --- a/src/librustc_save_analysis/span_utils.rs +++ b/src/librustc_save_analysis/span_utils.rs @@ -17,9 +17,9 @@ use std::env; use std::path::Path; use syntax::ast; -use syntax::parse::lexer::{self, Reader, StringReader}; +use syntax::parse::filemap_to_tts; +use syntax::parse::lexer::{self, StringReader}; use syntax::parse::token::{self, Token}; -use syntax::parse::parser::Parser; use syntax::symbol::keywords; use syntax::tokenstream::TokenTree; use syntax_pos::*; @@ -89,9 +89,9 @@ impl<'a> SpanUtils<'a> { } fn span_to_tts(&self, span: Span) -> Vec { - let srdr = self.retokenise_span(span); - let mut p = Parser::new(&self.sess.parse_sess, Box::new(srdr), None, false); - p.parse_all_token_trees().expect("Couldn't re-parse span") + let filename = String::from(""); + let filemap = self.sess.codemap().new_filemap(filename, None, self.snippet(span)); + filemap_to_tts(&self.sess.parse_sess, filemap) } // Re-parses a path and returns the span for the last identifier in the path diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 4b9b6518b4800..ca4b2caaf552e 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -21,7 +21,7 @@ use ext::base::*; use feature_gate::{self, Features}; use fold; use fold::*; -use parse::{ParseSess, DirectoryOwnership, PResult, lexer}; +use parse::{ParseSess, DirectoryOwnership, PResult, filemap_to_tts}; use parse::parser::Parser; use parse::token; use print::pprust; @@ -645,12 +645,8 @@ fn tts_for_attr(attr: &ast::Attribute, parse_sess: &ParseSess) -> Vec } fn string_to_tts(text: String, parse_sess: &ParseSess) -> Vec { - let filemap = parse_sess.codemap() - .new_filemap(String::from(""), None, text); - - let lexer = lexer::StringReader::new(parse_sess, filemap); - let mut parser = Parser::new(parse_sess, Box::new(lexer), None, false); - panictry!(parser.parse_all_token_trees()) + let filename = String::from(""); + filemap_to_tts(parse_sess, parse_sess.codemap().new_filemap(filename, None, text)) } impl<'a, 'b> Folder for InvocationCollector<'a, 'b> { diff --git a/src/libsyntax/parse/lexer/comments.rs b/src/libsyntax/parse/lexer/comments.rs index 8c94cf67bf6e6..c97b8ddf91972 100644 --- a/src/libsyntax/parse/lexer/comments.rs +++ b/src/libsyntax/parse/lexer/comments.rs @@ -13,10 +13,8 @@ pub use self::CommentStyle::*; use ast; use codemap::CodeMap; use syntax_pos::{BytePos, CharPos, Pos}; -use parse::lexer::is_block_doc_comment; -use parse::lexer::{StringReader, TokenAndSpan}; -use parse::lexer::{is_pattern_whitespace, Reader}; -use parse::{lexer, ParseSess}; +use parse::lexer::{is_block_doc_comment, is_pattern_whitespace}; +use parse::lexer::{self, ParseSess, StringReader, TokenAndSpan}; use print::pprust; use str::char_at; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index f1cb81a4c7de5..6c6161998d711 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -26,6 +26,7 @@ use std::rc::Rc; pub use ext::tt::transcribe::{TtReader, new_tt_reader}; pub mod comments; +mod tokentrees; mod unicode_chars; pub trait Reader { @@ -105,9 +106,44 @@ pub struct StringReader<'a> { // cache a direct reference to the source text, so that we don't have to // retrieve it via `self.filemap.src.as_ref().unwrap()` all the time. source_text: Rc, + /// Stack of open delimiters and their spans. Used for error message. + token: token::Token, + span: Span, + open_braces: Vec<(token::DelimToken, Span)>, } -impl<'a> Reader for StringReader<'a> { +impl<'a> StringReader<'a> { + fn next_token(&mut self) -> TokenAndSpan where Self: Sized { + let res = self.try_next_token(); + self.unwrap_or_abort(res) + } + fn unwrap_or_abort(&mut self, res: Result) -> TokenAndSpan { + match res { + Ok(tok) => tok, + Err(_) => { + self.emit_fatal_errors(); + panic!(FatalError); + } + } + } + fn try_real_token(&mut self) -> Result { + let mut t = self.try_next_token()?; + loop { + match t.tok { + token::Whitespace | token::Comment | token::Shebang(_) => { + t = self.try_next_token()?; + } + _ => break, + } + } + self.token = t.tok.clone(); + self.span = t.sp; + Ok(t) + } + pub fn real_token(&mut self) -> TokenAndSpan { + let res = self.try_real_token(); + self.unwrap_or_abort(res) + } fn is_eof(&self) -> bool { if self.ch.is_none() { return true; @@ -131,9 +167,6 @@ impl<'a> Reader for StringReader<'a> { fn fatal(&self, m: &str) -> FatalError { self.fatal_span(self.peek_span, m) } - fn err(&self, m: &str) { - self.err_span(self.peek_span, m) - } fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); @@ -209,6 +242,9 @@ impl<'a> StringReader<'a> { peek_span: syntax_pos::DUMMY_SP, source_text: source_text, fatal_errs: Vec::new(), + token: token::Eof, + span: syntax_pos::DUMMY_SP, + open_braces: Vec::new(), } } diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs new file mode 100644 index 0000000000000..7b6f00e0e8265 --- /dev/null +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -0,0 +1,138 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use print::pprust::token_to_string; +use parse::lexer::StringReader; +use parse::{token, PResult}; +use syntax_pos::Span; +use tokenstream::{Delimited, TokenTree}; + +use std::rc::Rc; + +impl<'a> StringReader<'a> { + // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. + pub fn parse_all_token_trees(&mut self) -> PResult<'a, Vec> { + let mut tts = Vec::new(); + while self.token != token::Eof { + tts.push(self.parse_token_tree()?); + } + Ok(tts) + } + + // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. + fn parse_token_trees_until_close_delim(&mut self) -> Vec { + let mut tts = vec![]; + loop { + if let token::CloseDelim(..) = self.token { + return tts; + } + match self.parse_token_tree() { + Ok(tt) => tts.push(tt), + Err(mut e) => { + e.emit(); + return tts; + } + } + } + } + + fn parse_token_tree(&mut self) -> PResult<'a, TokenTree> { + match self.token { + token::Eof => { + let msg = "this file contains an un-closed delimiter"; + let mut err = self.sess.span_diagnostic.struct_span_err(self.span, msg); + for &(_, sp) in &self.open_braces { + err.span_help(sp, "did you mean to close this delimiter?"); + } + Err(err) + }, + token::OpenDelim(delim) => { + // The span for beginning of the delimited section + let pre_span = self.span; + + // Parse the open delimiter. + self.open_braces.push((delim, self.span)); + let open_span = self.span; + self.real_token(); + + // Parse the token trees within the delimiters. + // We stop at any delimiter so we can try to recover if the user + // uses an incorrect delimiter. + let tts = self.parse_token_trees_until_close_delim(); + + let close_span = self.span; + // Expand to cover the entire delimited token tree + let span = Span { hi: close_span.hi, ..pre_span }; + + match self.token { + // Correct delimiter. + token::CloseDelim(d) if d == delim => { + self.open_braces.pop().unwrap(); + + // Parse the close delimiter. + self.real_token(); + } + // Incorrect delimiter. + token::CloseDelim(other) => { + let token_str = token_to_string(&self.token); + let msg = format!("incorrect close delimiter: `{}`", token_str); + let mut err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); + // This is a conservative error: only report the last unclosed delimiter. + // The previous unclosed delimiters could actually be closed! The parser + // just hasn't gotten to them yet. + if let Some(&(_, sp)) = self.open_braces.last() { + err.span_note(sp, "unclosed delimiter"); + }; + err.emit(); + + self.open_braces.pop().unwrap(); + + // If the incorrect delimiter matches an earlier opening + // delimiter, then don't consume it (it can be used to + // close the earlier one). Otherwise, consume it. + // E.g., we try to recover from: + // fn foo() { + // bar(baz( + // } // Incorrect delimiter but matches the earlier `{` + if !self.open_braces.iter().any(|&(b, _)| b == other) { + self.real_token(); + } + } + token::Eof => { + // Silently recover, the EOF token will be seen again + // and an error emitted then. Thus we don't pop from + // self.open_braces here. + }, + _ => {} + } + + Ok(TokenTree::Delimited(span, Rc::new(Delimited { + delim: delim, + open_span: open_span, + tts: tts, + close_span: close_span, + }))) + }, + token::CloseDelim(_) => { + // An unexpected closing delimiter (i.e., there is no + // matching opening delimiter). + let token_str = token_to_string(&self.token); + let msg = format!("unexpected close delimiter: `{}`", token_str); + let err = self.sess.span_diagnostic.struct_span_err(self.span, &msg); + Err(err) + }, + _ => { + let tt = TokenTree::Token(self.span, self.token.clone()); + self.real_token(); + Ok(tt) + } + } + } +} diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 74b313ba395a3..500e8285b4c05 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -219,13 +219,10 @@ fn file_to_filemap(sess: &ParseSess, path: &Path, spanopt: Option) } /// Given a filemap, produce a sequence of token-trees -pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) - -> Vec { - // it appears to me that the cfg doesn't matter here... indeed, - // parsing tt's probably shouldn't require a parser at all. - let srdr = lexer::StringReader::new(sess, filemap); - let mut p1 = Parser::new(sess, Box::new(srdr), None, false); - panictry!(p1.parse_all_token_trees()) +pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) -> Vec { + let mut srdr = lexer::StringReader::new(sess, filemap); + srdr.real_token(); + panictry!(srdr.parse_all_token_trees()) } /// Given tts and the ParseSess, produce a parser From debcbf0b8e8fcf6f1d44e8f79cc06c0866d8d1dd Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Fri, 13 Jan 2017 04:49:20 +0000 Subject: [PATCH 4/8] Refactor the parser to consume token trees. --- src/librustc/session/config.rs | 3 +- src/librustc_metadata/cstore_impl.rs | 18 +---- src/libsyntax/ext/base.rs | 4 +- src/libsyntax/ext/tt/macro_parser.rs | 7 +- src/libsyntax/ext/tt/macro_rules.rs | 20 +++-- src/libsyntax/ext/tt/transcribe.rs | 11 ++- src/libsyntax/parse/lexer/mod.rs | 74 ----------------- src/libsyntax/parse/mod.rs | 5 +- src/libsyntax/parse/parser.rs | 115 +++++---------------------- src/libsyntax/tokenstream.rs | 13 +-- src/test/parse-fail/issue-33569.rs | 2 +- 11 files changed, 59 insertions(+), 213 deletions(-) diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs index 104c851e057e6..7d8f7fcefe639 100644 --- a/src/librustc/session/config.rs +++ b/src/librustc/session/config.rs @@ -25,6 +25,7 @@ use lint; use middle::cstore; use syntax::ast::{self, IntTy, UintTy}; +use syntax::parse::token; use syntax::parse; use syntax::symbol::Symbol; use syntax::feature_gate::UnstableFeatures; @@ -1259,7 +1260,7 @@ pub fn parse_cfgspecs(cfgspecs: Vec ) -> ast::CrateConfig { let meta_item = panictry!(parser.parse_meta_item()); - if !parser.reader.is_eof() { + if parser.token != token::Eof { early_error(ErrorOutputType::default(), &format!("invalid --cfg argument: {}", s)) } else if meta_item.is_meta_item_list() { let msg = diff --git a/src/librustc_metadata/cstore_impl.rs b/src/librustc_metadata/cstore_impl.rs index 3d025e984b040..d962d1175527a 100644 --- a/src/librustc_metadata/cstore_impl.rs +++ b/src/librustc_metadata/cstore_impl.rs @@ -29,7 +29,7 @@ use rustc_back::PanicStrategy; use syntax::ast; use syntax::attr; -use syntax::parse::new_parser_from_source_str; +use syntax::parse::filemap_to_tts; use syntax::symbol::Symbol; use syntax_pos::{mk_sp, Span}; use rustc::hir::svh::Svh; @@ -395,19 +395,9 @@ impl<'tcx> CrateStore<'tcx> for cstore::CStore { let (name, def) = data.get_macro(id.index); let source_name = format!("<{} macros>", name); - // NB: Don't use parse_tts_from_source_str because it parses with quote_depth > 0. - let mut parser = new_parser_from_source_str(&sess.parse_sess, source_name, def.body); - - let lo = parser.span.lo; - let body = match parser.parse_all_token_trees() { - Ok(body) => body, - Err(mut err) => { - err.emit(); - sess.abort_if_errors(); - unreachable!(); - } - }; - let local_span = mk_sp(lo, parser.prev_span.hi); + let filemap = sess.parse_sess.codemap().new_filemap(source_name, None, def.body); + let local_span = mk_sp(filemap.start_pos, filemap.end_pos); + let body = filemap_to_tts(&sess.parse_sess, filemap); // Mark the attrs as used let attrs = data.get_item_attrs(id.index); diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index 68d261c64f83c..edf74e1fe19f1 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -615,9 +615,7 @@ impl<'a> ExtCtxt<'a> { pub fn new_parser_from_tts(&self, tts: &[tokenstream::TokenTree]) -> parser::Parser<'a> { - let mut parser = parse::tts_to_parser(self.parse_sess, tts.to_vec()); - parser.allow_interpolated_tts = false; // FIXME(jseyfried) `quote!` can't handle these yet - parser + parse::tts_to_parser(self.parse_sess, tts.to_vec()) } pub fn codemap(&self) -> &'a CodeMap { self.parse_sess.codemap() } pub fn parse_sess(&self) -> &'a parse::ParseSess { self.parse_sess } diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 2de31166070e7..46ffc93d2ee69 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -82,7 +82,6 @@ use ast::Ident; use syntax_pos::{self, BytePos, mk_sp, Span}; use codemap::Spanned; use errors::FatalError; -use parse::lexer::*; //resolve bug? use parse::{Directory, ParseSess}; use parse::parser::{PathStyle, Parser}; use parse::token::{DocComment, MatchNt, SubstNt}; @@ -407,9 +406,9 @@ fn inner_parse_loop(cur_eis: &mut SmallVector>, Success(()) } -pub fn parse(sess: &ParseSess, rdr: TtReader, ms: &[TokenTree], directory: Option) +pub fn parse(sess: &ParseSess, tts: Vec, ms: &[TokenTree], directory: Option) -> NamedParseResult { - let mut parser = Parser::new(sess, Box::new(rdr), directory, true); + let mut parser = Parser::new(sess, tts, directory, true); let mut cur_eis = SmallVector::one(initial_matcher_pos(ms.to_owned(), parser.span.lo)); let mut next_eis = Vec::new(); // or proceed normally @@ -527,7 +526,7 @@ fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { "ident" => match p.token { token::Ident(sn) => { p.bump(); - token::NtIdent(Spanned::{node: sn, span: p.span}) + token::NtIdent(Spanned::{node: sn, span: p.prev_span}) } _ => { let token_str = pprust::token_to_string(&p.token); diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 3abd24b50ba9a..585232c5462b4 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -16,8 +16,8 @@ use ext::expand::{Expansion, ExpansionKind}; use ext::tt::macro_parser::{Success, Error, Failure}; use ext::tt::macro_parser::{MatchedSeq, MatchedNonterminal}; use ext::tt::macro_parser::{parse, parse_failure_msg}; +use ext::tt::transcribe::new_tt_reader; use parse::{Directory, ParseSess}; -use parse::lexer::new_tt_reader; use parse::parser::Parser; use parse::token::{self, NtTT, Token}; use parse::token::Token::*; @@ -113,13 +113,21 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt, _ => cx.span_bug(sp, "malformed macro rhs"), }; // rhs has holes ( `$id` and `$(...)` that need filled) - let trncbr = + let mut trncbr = new_tt_reader(&cx.parse_sess.span_diagnostic, Some(named_matches), rhs); + let mut tts = Vec::new(); + loop { + let tok = trncbr.real_token(); + if tok.tok == token::Eof { + break + } + tts.push(TokenTree::Token(tok.sp, tok.tok)); + } let directory = Directory { path: cx.current_expansion.module.directory.clone(), ownership: cx.current_expansion.directory_ownership, }; - let mut p = Parser::new(cx.parse_sess(), Box::new(trncbr), Some(directory), false); + let mut p = Parser::new(cx.parse_sess(), tts, Some(directory), false); p.root_module_name = cx.current_expansion.module.mod_path.last() .map(|id| (*id.name.as_str()).to_owned()); @@ -187,10 +195,8 @@ pub fn compile(sess: &ParseSess, def: &ast::MacroDef) -> SyntaxExtension { })), ]; - // Parse the macro_rules! invocation (`none` is for no interpolations): - let arg_reader = new_tt_reader(&sess.span_diagnostic, None, def.body.clone()); - - let argument_map = match parse(sess, arg_reader, &argument_gram, None) { + // Parse the macro_rules! invocation + let argument_map = match parse(sess, def.body.clone(), &argument_gram, None) { Success(m) => m, Failure(sp, tok) => { let s = parse_failure_msg(tok); diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index 37e329e5d3b29..82f1e18389565 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -10,7 +10,7 @@ use self::LockstepIterSize::*; use ast::Ident; -use errors::{Handler, DiagnosticBuilder}; +use errors::Handler; use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal}; use parse::token::{self, MatchNt, SubstNt, Token, NtIdent}; use parse::lexer::TokenAndSpan; @@ -44,8 +44,12 @@ pub struct TtReader<'a> { /* cached: */ pub cur_tok: Token, pub cur_span: Span, - /// Transform doc comments. Only useful in macro invocations - pub fatal_errs: Vec>, +} + +impl<'a> TtReader<'a> { + pub fn real_token(&mut self) -> TokenAndSpan { + tt_next_token(self) + } } /// This can do Macro-By-Example transcription. On the other hand, if @@ -76,7 +80,6 @@ pub fn new_tt_reader(sp_diag: &Handler, /* dummy values, never read: */ cur_tok: token::Eof, cur_span: DUMMY_SP, - fatal_errs: Vec::new(), }; tt_next_token(&mut r); /* get cur_tok and cur_span set up */ r diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 6c6161998d711..12b9130c47439 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -12,7 +12,6 @@ use ast::{self, Ident}; use syntax_pos::{self, BytePos, CharPos, Pos, Span}; use codemap::CodeMap; use errors::{FatalError, DiagnosticBuilder}; -use ext::tt::transcribe::tt_next_token; use parse::{token, ParseSess}; use str::char_at; use symbol::{Symbol, keywords}; @@ -23,53 +22,10 @@ use std::char; use std::mem::replace; use std::rc::Rc; -pub use ext::tt::transcribe::{TtReader, new_tt_reader}; - pub mod comments; mod tokentrees; mod unicode_chars; -pub trait Reader { - fn is_eof(&self) -> bool; - fn try_next_token(&mut self) -> Result; - fn next_token(&mut self) -> TokenAndSpan where Self: Sized { - let res = self.try_next_token(); - self.unwrap_or_abort(res) - } - /// Report a fatal error with the current span. - fn fatal(&self, &str) -> FatalError; - /// Report a non-fatal error with the current span. - fn err(&self, &str); - fn emit_fatal_errors(&mut self); - fn unwrap_or_abort(&mut self, res: Result) -> TokenAndSpan { - match res { - Ok(tok) => tok, - Err(_) => { - self.emit_fatal_errors(); - panic!(FatalError); - } - } - } - fn peek(&self) -> TokenAndSpan; - /// Get a token the parser cares about. - fn try_real_token(&mut self) -> Result { - let mut t = self.try_next_token()?; - loop { - match t.tok { - token::Whitespace | token::Comment | token::Shebang(_) => { - t = self.try_next_token()?; - } - _ => break, - } - } - Ok(t) - } - fn real_token(&mut self) -> TokenAndSpan { - let res = self.try_real_token(); - self.unwrap_or_abort(res) - } -} - #[derive(Clone, PartialEq, Eq, Debug)] pub struct TokenAndSpan { pub tok: token::Token, @@ -182,36 +138,6 @@ impl<'a> StringReader<'a> { } } -impl<'a> Reader for TtReader<'a> { - fn is_eof(&self) -> bool { - self.peek().tok == token::Eof - } - fn try_next_token(&mut self) -> Result { - assert!(self.fatal_errs.is_empty()); - let r = tt_next_token(self); - debug!("TtReader: r={:?}", r); - Ok(r) - } - fn fatal(&self, m: &str) -> FatalError { - self.sp_diag.span_fatal(self.cur_span, m) - } - fn err(&self, m: &str) { - self.sp_diag.span_err(self.cur_span, m); - } - fn emit_fatal_errors(&mut self) { - for err in &mut self.fatal_errs { - err.emit(); - } - self.fatal_errs.clear(); - } - fn peek(&self) -> TokenAndSpan { - TokenAndSpan { - tok: self.cur_tok.clone(), - sp: self.cur_span, - } - } -} - impl<'a> StringReader<'a> { /// For comments.rs, which hackily pokes into next_pos and ch pub fn new_raw<'b>(sess: &'a ParseSess, filemap: Rc) -> Self { diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index 500e8285b4c05..0937ef15b4d6d 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -45,7 +45,7 @@ pub mod obsolete; /// Info about a parsing session. pub struct ParseSess { - pub span_diagnostic: Handler, // better be the same as the one in the reader! + pub span_diagnostic: Handler, pub unstable_features: UnstableFeatures, pub config: CrateConfig, /// Used to determine and report recursive mod inclusions @@ -227,8 +227,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc) -> Vec(sess: &'a ParseSess, tts: Vec) -> Parser<'a> { - let trdr = lexer::new_tt_reader(&sess.span_diagnostic, None, tts); - let mut p = Parser::new(sess, Box::new(trdr), None, false); + let mut p = Parser::new(sess, tts, None, false); p.check_unknown_macro_variable(); p } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 9ba6d4d17f794..608f8688e8810 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -46,7 +46,7 @@ use ext::tt::macro_parser; use parse; use parse::classify; use parse::common::SeqSep; -use parse::lexer::{Reader, TokenAndSpan}; +use parse::lexer::TokenAndSpan; use parse::obsolete::ObsoleteSyntax; use parse::token::{self, MatchNt, SubstNt}; use parse::{new_sub_parser_from_file, ParseSess, Directory, DirectoryOwnership}; @@ -188,14 +188,11 @@ pub struct Parser<'a> { pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter parsing_token_tree: bool, - pub reader: Box, /// The set of seen errors about obsolete syntax. Used to suppress /// extra detail when the same error is seen twice pub obsolete_set: HashSet, /// Used to determine the path to externally loaded source files pub directory: Directory, - /// Stack of open delimiters and their spans. Used for error message. - pub open_braces: Vec<(token::DelimToken, Span)>, /// Name of the root module this parser originated from. If `None`, then the /// name is not known. This does not change while the parser is descending /// into modules, and sub-parsers have new values for this name. @@ -203,7 +200,6 @@ pub struct Parser<'a> { pub expected_tokens: Vec, pub tts: Vec<(TokenTree, usize)>, pub desugar_doc_comments: bool, - pub allow_interpolated_tts: bool, } #[derive(PartialEq, Eq, Clone)] @@ -269,12 +265,17 @@ impl From> for LhsExpr { impl<'a> Parser<'a> { pub fn new(sess: &'a ParseSess, - rdr: Box, + tokens: Vec, directory: Option, desugar_doc_comments: bool) -> Self { + let tt = TokenTree::Delimited(syntax_pos::DUMMY_SP, Rc::new(Delimited { + delim: token::NoDelim, + open_span: syntax_pos::DUMMY_SP, + tts: tokens, + close_span: syntax_pos::DUMMY_SP, + })); let mut parser = Parser { - reader: rdr, sess: sess, token: token::Underscore, span: syntax_pos::DUMMY_SP, @@ -286,12 +287,10 @@ impl<'a> Parser<'a> { parsing_token_tree: false, obsolete_set: HashSet::new(), directory: Directory { path: PathBuf::new(), ownership: DirectoryOwnership::Owned }, - open_braces: Vec::new(), root_module_name: None, expected_tokens: Vec::new(), - tts: Vec::new(), + tts: if tt.len() > 0 { vec![(tt, 0)] } else { Vec::new() }, desugar_doc_comments: desugar_doc_comments, - allow_interpolated_tts: true, }; let tok = parser.next_tok(); @@ -320,7 +319,7 @@ impl<'a> Parser<'a> { continue } } else { - self.reader.real_token() + TokenAndSpan { tok: token::Eof, sp: self.span } }; loop { @@ -2688,94 +2687,28 @@ impl<'a> Parser<'a> { // whether something will be a nonterminal or a seq // yet. match self.token { - token::Eof => { - let mut err: DiagnosticBuilder<'a> = - self.diagnostic().struct_span_err(self.span, - "this file contains an un-closed delimiter"); - for &(_, sp) in &self.open_braces { - err.span_help(sp, "did you mean to close this delimiter?"); - } - - Err(err) - }, token::OpenDelim(delim) => { - if self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) { + if self.quote_depth == 0 && self.tts.last().map(|&(_, i)| i == 1).unwrap_or(false) { let tt = self.tts.pop().unwrap().0; self.bump(); - return Ok(if self.allow_interpolated_tts { - // avoid needlessly reparsing token trees in recursive macro expansions - TokenTree::Token(tt.span(), token::Interpolated(Rc::new(token::NtTT(tt)))) - } else { - tt - }); + return Ok(tt); } let parsing_token_tree = ::std::mem::replace(&mut self.parsing_token_tree, true); - // The span for beginning of the delimited section - let pre_span = self.span; - - // Parse the open delimiter. - self.open_braces.push((delim, self.span)); let open_span = self.span; self.bump(); - - // Parse the token trees within the delimiters. - // We stop at any delimiter so we can try to recover if the user - // uses an incorrect delimiter. let tts = self.parse_seq_to_before_tokens(&[&token::CloseDelim(token::Brace), &token::CloseDelim(token::Paren), &token::CloseDelim(token::Bracket)], SeqSep::none(), |p| p.parse_token_tree(), |mut e| e.emit()); + self.parsing_token_tree = parsing_token_tree; let close_span = self.span; - // Expand to cover the entire delimited token tree - let span = Span { hi: close_span.hi, ..pre_span }; - - match self.token { - // Correct delimiter. - token::CloseDelim(d) if d == delim => { - self.open_braces.pop().unwrap(); - - // Parse the close delimiter. - self.bump(); - } - // Incorrect delimiter. - token::CloseDelim(other) => { - let token_str = self.this_token_to_string(); - let mut err = self.diagnostic().struct_span_err(self.span, - &format!("incorrect close delimiter: `{}`", token_str)); - // This is a conservative error: only report the last unclosed delimiter. - // The previous unclosed delimiters could actually be closed! The parser - // just hasn't gotten to them yet. - if let Some(&(_, sp)) = self.open_braces.last() { - err.span_note(sp, "unclosed delimiter"); - }; - err.emit(); - - self.open_braces.pop().unwrap(); - - // If the incorrect delimiter matches an earlier opening - // delimiter, then don't consume it (it can be used to - // close the earlier one). Otherwise, consume it. - // E.g., we try to recover from: - // fn foo() { - // bar(baz( - // } // Incorrect delimiter but matches the earlier `{` - if !self.open_braces.iter().any(|&(b, _)| b == other) { - self.bump(); - } - } - token::Eof => { - // Silently recover, the EOF token will be seen again - // and an error emitted then. Thus we don't pop from - // self.open_braces here. - }, - _ => {} - } + self.bump(); - self.parsing_token_tree = parsing_token_tree; + let span = Span { lo: open_span.lo, ..close_span }; Ok(TokenTree::Delimited(span, Rc::new(Delimited { delim: delim, open_span: open_span, @@ -2783,21 +2716,9 @@ impl<'a> Parser<'a> { close_span: close_span, }))) }, - token::CloseDelim(_) => { - // An unexpected closing delimiter (i.e., there is no - // matching opening delimiter). - let token_str = self.this_token_to_string(); - let err = self.diagnostic().struct_span_err(self.span, - &format!("unexpected close delimiter: `{}`", token_str)); - Err(err) - }, - /* we ought to allow different depths of unquotation */ - token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => { - self.parse_unquoted() - } - _ => { - Ok(TokenTree::Token(self.span, self.bump_and_get())) - } + token::CloseDelim(_) | token::Eof => unreachable!(), + token::Dollar | token::SubstNt(..) if self.quote_depth > 0 => self.parse_unquoted(), + _ => Ok(TokenTree::Token(self.span, self.bump_and_get())), } } diff --git a/src/libsyntax/tokenstream.rs b/src/libsyntax/tokenstream.rs index e352e7853c71c..ab5dc8181e05b 100644 --- a/src/libsyntax/tokenstream.rs +++ b/src/libsyntax/tokenstream.rs @@ -30,7 +30,6 @@ use codemap::{Spanned, combine_spans}; use ext::base; use ext::tt::macro_parser; use parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration}; -use parse::lexer; use parse::{self, Directory}; use parse::token::{self, Token, Lit, Nonterminal}; use print::pprust; @@ -139,7 +138,10 @@ impl TokenTree { if let Nonterminal::NtTT(..) = **nt { 1 } else { 0 } }, TokenTree::Token(_, token::MatchNt(..)) => 3, - TokenTree::Delimited(_, ref delimed) => delimed.tts.len() + 2, + TokenTree::Delimited(_, ref delimed) => match delimed.delim { + token::NoDelim => delimed.tts.len(), + _ => delimed.tts.len() + 2, + }, TokenTree::Sequence(_, ref seq) => seq.tts.len(), TokenTree::Token(..) => 0, } @@ -181,6 +183,9 @@ impl TokenTree { close_span: sp, })) } + (&TokenTree::Delimited(_, ref delimed), _) if delimed.delim == token::NoDelim => { + delimed.tts[index].clone() + } (&TokenTree::Delimited(_, ref delimed), _) => { if index == 0 { return delimed.open_tt(); @@ -215,14 +220,12 @@ impl TokenTree { mtch: &[TokenTree], tts: &[TokenTree]) -> macro_parser::NamedParseResult { - let diag = &cx.parse_sess().span_diagnostic; // `None` is because we're not interpolating - let arg_rdr = lexer::new_tt_reader(diag, None, tts.iter().cloned().collect()); let directory = Directory { path: cx.current_expansion.module.directory.clone(), ownership: cx.current_expansion.directory_ownership, }; - macro_parser::parse(cx.parse_sess(), arg_rdr, mtch, Some(directory)) + macro_parser::parse(cx.parse_sess(), tts.iter().cloned().collect(), mtch, Some(directory)) } /// Check if this TokenTree is equal to the other, regardless of span information. diff --git a/src/test/parse-fail/issue-33569.rs b/src/test/parse-fail/issue-33569.rs index 130278d778ab0..e3c17af82aab4 100644 --- a/src/test/parse-fail/issue-33569.rs +++ b/src/test/parse-fail/issue-33569.rs @@ -13,6 +13,6 @@ macro_rules! foo { { $+ } => { //~ ERROR expected identifier, found `+` $(x)(y) //~ ERROR expected `*` or `+` - //~^ ERROR no rules expected the token `y` + //~^ ERROR no rules expected the token `)` } } From 6a9248fc1525e619d4ffb2b895a8d15c4bf90de8 Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Sat, 14 Jan 2017 12:15:26 +0000 Subject: [PATCH 5/8] Clean up `ext::tt::transcribe`. --- src/libsyntax/ext/tt/macro_rules.rs | 13 +--- src/libsyntax/ext/tt/transcribe.rs | 96 ++++++++++------------------- 2 files changed, 35 insertions(+), 74 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 585232c5462b4..f6a25d4aceed7 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -16,7 +16,7 @@ use ext::expand::{Expansion, ExpansionKind}; use ext::tt::macro_parser::{Success, Error, Failure}; use ext::tt::macro_parser::{MatchedSeq, MatchedNonterminal}; use ext::tt::macro_parser::{parse, parse_failure_msg}; -use ext::tt::transcribe::new_tt_reader; +use ext::tt::transcribe::transcribe; use parse::{Directory, ParseSess}; use parse::parser::Parser; use parse::token::{self, NtTT, Token}; @@ -113,16 +113,7 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt, _ => cx.span_bug(sp, "malformed macro rhs"), }; // rhs has holes ( `$id` and `$(...)` that need filled) - let mut trncbr = - new_tt_reader(&cx.parse_sess.span_diagnostic, Some(named_matches), rhs); - let mut tts = Vec::new(); - loop { - let tok = trncbr.real_token(); - if tok.tok == token::Eof { - break - } - tts.push(TokenTree::Token(tok.sp, tok.tok)); - } + let tts = transcribe(&cx.parse_sess.span_diagnostic, Some(named_matches), rhs); let directory = Directory { path: cx.current_expansion.module.directory.clone(), ownership: cx.current_expansion.directory_ownership, diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index 82f1e18389565..bf6851ec1dc01 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -13,7 +13,6 @@ use ast::Ident; use errors::Handler; use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal}; use parse::token::{self, MatchNt, SubstNt, Token, NtIdent}; -use parse::lexer::TokenAndSpan; use syntax_pos::{Span, DUMMY_SP}; use tokenstream::{self, TokenTree}; use util::small_vector::SmallVector; @@ -32,8 +31,8 @@ struct TtFrame { } #[derive(Clone)] -pub struct TtReader<'a> { - pub sp_diag: &'a Handler, +struct TtReader<'a> { + sp_diag: &'a Handler, /// the unzipped tree: stack: SmallVector, /* for MBE-style macro transcription */ @@ -41,24 +40,15 @@ pub struct TtReader<'a> { repeat_idx: Vec, repeat_len: Vec, - /* cached: */ - pub cur_tok: Token, - pub cur_span: Span, -} - -impl<'a> TtReader<'a> { - pub fn real_token(&mut self) -> TokenAndSpan { - tt_next_token(self) - } } /// This can do Macro-By-Example transcription. On the other hand, if /// `src` contains no `TokenTree::Sequence`s, `MatchNt`s or `SubstNt`s, `interp` can /// (and should) be None. -pub fn new_tt_reader(sp_diag: &Handler, - interp: Option>>, - src: Vec) - -> TtReader { +pub fn transcribe(sp_diag: &Handler, + interp: Option>>, + src: Vec) + -> Vec { let mut r = TtReader { sp_diag: sp_diag, stack: SmallVector::one(TtFrame { @@ -77,12 +67,15 @@ pub fn new_tt_reader(sp_diag: &Handler, }, repeat_idx: Vec::new(), repeat_len: Vec::new(), - /* dummy values, never read: */ - cur_tok: token::Eof, - cur_span: DUMMY_SP, }; - tt_next_token(&mut r); /* get cur_tok and cur_span set up */ - r + + let mut tts = Vec::new(); + let mut prev_span = DUMMY_SP; + while let Some(tt) = tt_next_token(&mut r, prev_span) { + prev_span = tt.span(); + tts.push(tt); + } + tts } fn lookup_cur_matched_by_matched(r: &TtReader, start: Rc) -> Rc { @@ -156,38 +149,24 @@ fn lockstep_iter_size(t: &TokenTree, r: &TtReader) -> LockstepIterSize { /// Return the next token from the TtReader. /// EFFECT: advances the reader's token field -pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { - // FIXME(pcwalton): Bad copy? - let ret_val = TokenAndSpan { - tok: r.cur_tok.clone(), - sp: r.cur_span.clone(), - }; +fn tt_next_token(r: &mut TtReader, prev_span: Span) -> Option { loop { - let should_pop = match r.stack.last() { - None => { - assert_eq!(ret_val.tok, token::Eof); - return ret_val; - } - Some(frame) => { - if frame.idx < frame.forest.len() { - break; - } - !frame.dotdotdoted || - *r.repeat_idx.last().unwrap() == *r.repeat_len.last().unwrap() - 1 + let should_pop = if let Some(frame) = r.stack.last() { + if frame.idx < frame.forest.len() { + break; } + !frame.dotdotdoted || *r.repeat_idx.last().unwrap() == *r.repeat_len.last().unwrap() - 1 + } else { + return None; }; /* done with this set; pop or repeat? */ if should_pop { let prev = r.stack.pop().unwrap(); - match r.stack.last_mut() { - None => { - r.cur_tok = token::Eof; - return ret_val; - } - Some(frame) => { - frame.idx += 1; - } + if let Some(frame) = r.stack.last_mut() { + frame.idx += 1; + } else { + return None; } if prev.dotdotdoted { r.repeat_idx.pop(); @@ -197,8 +176,7 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { *r.repeat_idx.last_mut().unwrap() += 1; r.stack.last_mut().unwrap().idx = 0; if let Some(tk) = r.stack.last().unwrap().sep.clone() { - r.cur_tok = tk; // repeat same span, I guess - return ret_val; + return Some(TokenTree::Token(prev_span, tk)); // repeat same span, I guess } } } @@ -234,7 +212,7 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { } r.stack.last_mut().unwrap().idx += 1; - return tt_next_token(r); + return tt_next_token(r, prev_span); } r.repeat_len.push(len); r.repeat_idx.push(0); @@ -252,9 +230,7 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { r.stack.last_mut().unwrap().idx += 1; match lookup_cur_matched(r, ident) { None => { - r.cur_span = sp; - r.cur_tok = SubstNt(ident); - return ret_val; + return Some(TokenTree::Token(sp, SubstNt(ident))); // this can't be 0 length, just like TokenTree::Delimited } Some(cur_matched) => if let MatchedNonterminal(ref nt) = *cur_matched { @@ -263,15 +239,11 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { // (a) idents can be in lots of places, so it'd be a pain // (b) we actually can, since it's a token. NtIdent(ref sn) => { - r.cur_span = sn.span; - r.cur_tok = token::Ident(sn.node); - return ret_val; + return Some(TokenTree::Token(sn.span, token::Ident(sn.node))); } _ => { - // FIXME(pcwalton): Bad copy. - r.cur_span = sp; - r.cur_tok = token::Interpolated(nt.clone()); - return ret_val; + // FIXME(pcwalton): Bad copy + return Some(TokenTree::Token(sp, token::Interpolated(nt.clone()))); } } } else { @@ -292,11 +264,9 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { }); // if this could be 0-length, we'd need to potentially recur here } - TokenTree::Token(sp, tok) => { - r.cur_span = sp; - r.cur_tok = tok; + tt @ TokenTree::Token(..) => { r.stack.last_mut().unwrap().idx += 1; - return ret_val; + return Some(tt); } } } From 57c0ed097ce150fa1d684b5b3b5479a5dedd2b7b Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Sat, 14 Jan 2017 12:42:00 +0000 Subject: [PATCH 6/8] Avoid interpolated token trees. --- src/libsyntax/ext/tt/macro_parser.rs | 17 +---------------- src/libsyntax/ext/tt/transcribe.rs | 3 ++- src/libsyntax/parse/parser.rs | 26 ++++++-------------------- 3 files changed, 9 insertions(+), 37 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 46ffc93d2ee69..834ece97af544 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -480,23 +480,8 @@ fn parse_nt<'a>(p: &mut Parser<'a>, sp: Span, name: &str) -> Nonterminal { match name { "tt" => { p.quote_depth += 1; //but in theory, non-quoted tts might be useful - let mut tt = panictry!(p.parse_token_tree()); + let tt = panictry!(p.parse_token_tree()); p.quote_depth -= 1; - while let TokenTree::Token(sp, token::Interpolated(nt)) = tt { - if let token::NtTT(..) = *nt { - match Rc::try_unwrap(nt) { - Ok(token::NtTT(sub_tt)) => tt = sub_tt, - Ok(_) => unreachable!(), - Err(nt_rc) => match *nt_rc { - token::NtTT(ref sub_tt) => tt = sub_tt.clone(), - _ => unreachable!(), - }, - } - } else { - tt = TokenTree::Token(sp, token::Interpolated(nt.clone())); - break - } - } return token::NtTT(tt); } _ => {} diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index bf6851ec1dc01..38becbe7b1d30 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -12,7 +12,7 @@ use self::LockstepIterSize::*; use ast::Ident; use errors::Handler; use ext::tt::macro_parser::{NamedMatch, MatchedSeq, MatchedNonterminal}; -use parse::token::{self, MatchNt, SubstNt, Token, NtIdent}; +use parse::token::{self, MatchNt, SubstNt, Token, NtIdent, NtTT}; use syntax_pos::{Span, DUMMY_SP}; use tokenstream::{self, TokenTree}; use util::small_vector::SmallVector; @@ -241,6 +241,7 @@ fn tt_next_token(r: &mut TtReader, prev_span: Span) -> Option { NtIdent(ref sn) => { return Some(TokenTree::Token(sn.span, token::Ident(sn.node))); } + NtTT(ref tt) => return Some(tt.clone()), _ => { // FIXME(pcwalton): Bad copy return Some(TokenTree::Token(sp, token::Interpolated(nt.clone()))); diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index 608f8688e8810..f958cedd286f3 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -306,8 +306,8 @@ impl<'a> Parser<'a> { } fn next_tok(&mut self) -> TokenAndSpan { - 'outer: loop { - let mut tok = if let Some((tts, i)) = self.tts.pop() { + loop { + let tok = if let Some((tts, i)) = self.tts.pop() { let tt = tts.get_tt(i); if i + 1 < tts.len() { self.tts.push((tts, i + 1)); @@ -322,25 +322,11 @@ impl<'a> Parser<'a> { TokenAndSpan { tok: token::Eof, sp: self.span } }; - loop { - let nt = match tok.tok { - token::Interpolated(ref nt) => nt.clone(), - token::DocComment(name) if self.desugar_doc_comments => { - self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0)); - continue 'outer - } - _ => return tok, - }; - match *nt { - token::NtTT(TokenTree::Token(sp, ref t)) => { - tok = TokenAndSpan { tok: t.clone(), sp: sp }; - } - token::NtTT(ref tt) => { - self.tts.push((tt.clone(), 0)); - continue 'outer - } - _ => return tok, + match tok.tok { + token::DocComment(name) if self.desugar_doc_comments => { + self.tts.push((TokenTree::Token(tok.sp, token::DocComment(name)), 0)); } + _ => return tok, } } } From 4c98e1bc592841c03b5228787c97c2edf67ccc6c Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Sat, 14 Jan 2017 11:13:45 +0000 Subject: [PATCH 7/8] Remove the lookahead buffer. --- src/libsyntax/parse/parser.rs | 51 ++++++++++------------------------- 1 file changed, 14 insertions(+), 37 deletions(-) diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index f958cedd286f3..aa5331e4c7d78 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -156,22 +156,6 @@ enum PrevTokenKind { Other, } -// Simple circular buffer used for keeping few next tokens. -#[derive(Default)] -struct LookaheadBuffer { - buffer: [TokenAndSpan; LOOKAHEAD_BUFFER_CAPACITY], - start: usize, - end: usize, -} - -const LOOKAHEAD_BUFFER_CAPACITY: usize = 8; - -impl LookaheadBuffer { - fn len(&self) -> usize { - (LOOKAHEAD_BUFFER_CAPACITY + self.end - self.start) % LOOKAHEAD_BUFFER_CAPACITY - } -} - /* ident is handled by common.rs */ pub struct Parser<'a> { @@ -184,7 +168,6 @@ pub struct Parser<'a> { pub prev_span: Span, /// the previous token kind prev_token_kind: PrevTokenKind, - lookahead_buffer: LookaheadBuffer, pub restrictions: Restrictions, pub quote_depth: usize, // not (yet) related to the quasiquoter parsing_token_tree: bool, @@ -281,7 +264,6 @@ impl<'a> Parser<'a> { span: syntax_pos::DUMMY_SP, prev_span: syntax_pos::DUMMY_SP, prev_token_kind: PrevTokenKind::Other, - lookahead_buffer: Default::default(), restrictions: Restrictions::empty(), quote_depth: 0, parsing_token_tree: false, @@ -875,14 +857,7 @@ impl<'a> Parser<'a> { _ => PrevTokenKind::Other, }; - let next = if self.lookahead_buffer.start == self.lookahead_buffer.end { - self.next_tok() - } else { - // Avoid token copies with `replace`. - let old_start = self.lookahead_buffer.start; - self.lookahead_buffer.start = (old_start + 1) % LOOKAHEAD_BUFFER_CAPACITY; - mem::replace(&mut self.lookahead_buffer.buffer[old_start], Default::default()) - }; + let next = self.next_tok(); self.span = next.sp; self.token = next.tok; self.expected_tokens.clear(); @@ -917,18 +892,20 @@ impl<'a> Parser<'a> { F: FnOnce(&token::Token) -> R, { if dist == 0 { - f(&self.token) - } else if dist < LOOKAHEAD_BUFFER_CAPACITY { - while self.lookahead_buffer.len() < dist { - self.lookahead_buffer.buffer[self.lookahead_buffer.end] = self.next_tok(); - self.lookahead_buffer.end = - (self.lookahead_buffer.end + 1) % LOOKAHEAD_BUFFER_CAPACITY; - } - let index = (self.lookahead_buffer.start + dist - 1) % LOOKAHEAD_BUFFER_CAPACITY; - f(&self.lookahead_buffer.buffer[index].tok) - } else { - self.bug("lookahead distance is too large"); + return f(&self.token); + } + let mut tok = token::Eof; + if let Some(&(ref tts, mut i)) = self.tts.last() { + i += dist - 1; + if i < tts.len() { + tok = match tts.get_tt(i) { + TokenTree::Token(_, tok) => tok, + TokenTree::Delimited(_, delimited) => token::OpenDelim(delimited.delim), + TokenTree::Sequence(..) => token::Dollar, + }; + } } + f(&tok) } pub fn fatal(&self, m: &str) -> DiagnosticBuilder<'a> { self.sess.span_diagnostic.struct_span_fatal(self.span, m) From 0b9e26f390403aa95620d3b813f046732b371fb1 Mon Sep 17 00:00:00 2001 From: Jeffrey Seyfried Date: Tue, 17 Jan 2017 04:50:46 +0000 Subject: [PATCH 8/8] Fix fallout in `rustdoc`. --- src/librustdoc/html/highlight.rs | 16 +++++++--------- src/libsyntax/parse/lexer/mod.rs | 6 +++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/librustdoc/html/highlight.rs b/src/librustdoc/html/highlight.rs index a031be8b3c2be..0629e93e7ef5d 100644 --- a/src/librustdoc/html/highlight.rs +++ b/src/librustdoc/html/highlight.rs @@ -27,7 +27,7 @@ use std::io; use std::io::prelude::*; use syntax::codemap::CodeMap; -use syntax::parse::lexer::{self, Reader, TokenAndSpan}; +use syntax::parse::lexer::{self, TokenAndSpan}; use syntax::parse::token; use syntax::parse; use syntax_pos::Span; @@ -42,8 +42,7 @@ pub fn render_with_highlighting(src: &str, class: Option<&str>, id: Option<&str> let mut out = Vec::new(); write_header(class, id, &mut out).unwrap(); - let mut classifier = Classifier::new(lexer::StringReader::new(&sess.span_diagnostic, fm), - sess.codemap()); + let mut classifier = Classifier::new(lexer::StringReader::new(&sess, fm), sess.codemap()); if let Err(_) = classifier.write_source(&mut out) { return format!("
{}
", src); } @@ -63,8 +62,7 @@ pub fn render_inner_with_highlighting(src: &str) -> io::Result { let fm = sess.codemap().new_filemap("".to_string(), None, src.to_string()); let mut out = Vec::new(); - let mut classifier = Classifier::new(lexer::StringReader::new(&sess.span_diagnostic, fm), - sess.codemap()); + let mut classifier = Classifier::new(lexer::StringReader::new(&sess, fm), sess.codemap()); classifier.write_source(&mut out)?; Ok(String::from_utf8_lossy(&out).into_owned()) @@ -185,10 +183,10 @@ impl<'a> Classifier<'a> { Ok(tas) => tas, Err(_) => { self.lexer.emit_fatal_errors(); - self.lexer.span_diagnostic.struct_warn("Backing out of syntax highlighting") - .note("You probably did not intend to render this \ - as a rust code-block") - .emit(); + self.lexer.sess.span_diagnostic + .struct_warn("Backing out of syntax highlighting") + .note("You probably did not intend to render this as a rust code-block") + .emit(); return Err(io::Error::new(io::ErrorKind::Other, "")); } }; diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 12b9130c47439..6bc15115b09d3 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -111,7 +111,7 @@ impl<'a> StringReader<'a> { } } /// Return the next token. EFFECT: advances the string_reader. - fn try_next_token(&mut self) -> Result { + pub fn try_next_token(&mut self) -> Result { assert!(self.fatal_errs.is_empty()); let ret_val = TokenAndSpan { tok: replace(&mut self.peek_tok, token::Underscore), @@ -123,13 +123,13 @@ impl<'a> StringReader<'a> { fn fatal(&self, m: &str) -> FatalError { self.fatal_span(self.peek_span, m) } - fn emit_fatal_errors(&mut self) { + pub fn emit_fatal_errors(&mut self) { for err in &mut self.fatal_errs { err.emit(); } self.fatal_errs.clear(); } - fn peek(&self) -> TokenAndSpan { + pub fn peek(&self) -> TokenAndSpan { // FIXME(pcwalton): Bad copy! TokenAndSpan { tok: self.peek_tok.clone(),