Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove special code-path for handing unknown tokens #63017

Merged
merged 3 commits into from
Aug 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for token::TokenKind {
}

token::DocComment(val) |
token::Shebang(val) => val.hash_stable(hcx, hasher),
token::Shebang(val) |
token::Unknown(val) => val.hash_stable(hcx, hasher),
}
}
}
Expand Down
29 changes: 15 additions & 14 deletions src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ pub fn render_with_highlighting(

let mut highlighted_source = vec![];
if classifier.write_source(&mut highlighted_source).is_err() {
Err(classifier.lexer.buffer_fatal_errors())
Err(())
} else {
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
}
Expand All @@ -59,14 +59,9 @@ pub fn render_with_highlighting(
}
write_footer(&mut out).unwrap();
}
Err(errors) => {
// If errors are encountered while trying to highlight, cancel the errors and just emit
// the unhighlighted source. The errors will have already been reported in the
// `check-code-block-syntax` pass.
for mut error in errors {
error.cancel();
}

Err(()) => {
// If errors are encountered while trying to highlight, just emit
// the unhighlighted source.
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
}
}
Expand Down Expand Up @@ -192,14 +187,20 @@ impl<'a> Classifier<'a> {
if let Some(token) = self.peek_token.take() {
return Ok(token);
}
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)
let token = self.lexer.next_token();
if let token::Unknown(..) = &token.kind {
return Err(HighlightError::LexError);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is HighlightError::LexError necessary?
Can't the highlighter treat the token as a whitespace and continue, similarly to parser?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mainly to minimize the diff in code and tests. I think rustdoc side needs a different approach altogether, to avoid duplicating lexer errors in two passes. But rusdoc can be improved separatelly

}
Ok(token)
}

fn peek(&mut self) -> Result<&Token, HighlightError> {
if self.peek_token.is_none() {
self.peek_token = Some(
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)?
);
let token = self.lexer.next_token();
if let token::Unknown(..) = &token.kind {
return Err(HighlightError::LexError);
}
self.peek_token = Some(token);
}
Ok(self.peek_token.as_ref().unwrap())
}
Expand Down Expand Up @@ -237,7 +238,7 @@ impl<'a> Classifier<'a> {
return Ok(());
},

token::Whitespace => Class::None,
token::Whitespace | token::Unknown(..) => Class::None,
token::Comment => Class::Comment,
token::DocComment(..) => Class::DocComment,

Expand Down
32 changes: 9 additions & 23 deletions src/librustdoc/passes/check_code_block_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,24 +32,20 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
dox[code_block.code].to_owned(),
);

let errors = {
let has_errors = {
let mut has_errors = false;
let mut lexer = Lexer::new(&sess, source_file, None);
while let Ok(token::Token { kind, .. }) = lexer.try_next_token() {
if kind == token::Eof {
break;
loop {
match lexer.next_token().kind {
token::Eof => break,
token::Unknown(..) => has_errors = true,
_ => (),
}
}

let errors = lexer.buffer_fatal_errors();
petrochenkov marked this conversation as resolved.
Show resolved Hide resolved

if !errors.is_empty() {
Err(errors)
} else {
Ok(())
}
has_errors
};

if let Err(errors) = errors {
if has_errors {
let mut diag = if let Some(sp) =
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
{
Expand All @@ -58,11 +54,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
.sess()
.struct_span_warn(sp, "could not parse code block as Rust code");

for mut err in errors {
diag.note(&format!("error from rustc: {}", err.message()));
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
let sp = sp.from_inner(InnerSpan::new(0, 3));
diag.span_suggestion(
Expand All @@ -82,11 +73,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
"doc comment contains an invalid Rust code block",
);

for mut err in errors {
// Don't bother reporting the error, because we can't show where it happened.
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
diag.help("mark blocks that do not contain Rust code as text: ```text");
}
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/ext/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
}

OpenDelim(..) | CloseDelim(..) => unreachable!(),
Whitespace | Comment | Shebang(..) | Eof => unreachable!(),
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => unreachable!(),
}
}
}
Expand Down
73 changes: 13 additions & 60 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind};
use crate::symbol::{sym, Symbol};
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};

use errors::{FatalError, Diagnostic, DiagnosticBuilder};
use errors::{FatalError, DiagnosticBuilder};
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
use rustc_lexer::Base;
use rustc_lexer::unescape;
Expand Down Expand Up @@ -39,7 +39,6 @@ pub struct StringReader<'a> {
pos: BytePos,
/// Stop reading src at this index.
end_src_index: usize,
fatal_errs: Vec<DiagnosticBuilder<'a>>,
/// Source text to tokenize.
src: Lrc<String>,
override_span: Option<Span>,
Expand All @@ -62,7 +61,6 @@ impl<'a> StringReader<'a> {
pos: source_file.start_pos,
end_src_index: src.len(),
src,
fatal_errs: Vec::new(),
override_span,
}
}
Expand All @@ -89,29 +87,17 @@ impl<'a> StringReader<'a> {
self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
}

fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
match res {
Ok(tok) => tok,
Err(_) => {
self.emit_fatal_errors();
FatalError.raise();
}
}
}

/// Returns the next token, including trivia like whitespace or comments.
///
/// `Err(())` means that some errors were encountered, which can be
/// retrieved using `buffer_fatal_errors`.
pub fn try_next_token(&mut self) -> Result<Token, ()> {
assert!(self.fatal_errs.is_empty());

pub fn next_token(&mut self) -> Token {
let start_src_index = self.src_index(self.pos);
let text: &str = &self.src[start_src_index..self.end_src_index];

if text.is_empty() {
let span = self.mk_sp(self.pos, self.pos);
return Ok(Token::new(token::Eof, span));
return Token::new(token::Eof, span);
}

{
Expand All @@ -125,7 +111,7 @@ impl<'a> StringReader<'a> {
let kind = token::Shebang(sym);

let span = self.mk_sp(start, self.pos);
return Ok(Token::new(kind, span));
return Token::new(kind, span);
}
}
}
Expand All @@ -139,39 +125,10 @@ impl<'a> StringReader<'a> {

// This could use `?`, but that makes code significantly (10-20%) slower.
// https://github.com/rust-lang/rust/issues/37939
let kind = match self.cook_lexer_token(token.kind, start) {
Ok(it) => it,
Err(err) => return Err(self.fatal_errs.push(err)),
};
let kind = self.cook_lexer_token(token.kind, start);

let span = self.mk_sp(start, self.pos);
Ok(Token::new(kind, span))
}

/// Returns the next token, including trivia like whitespace or comments.
///
/// Aborts in case of an error.
pub fn next_token(&mut self) -> Token {
let res = self.try_next_token();
self.unwrap_or_abort(res)
}

fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}

self.fatal_errs.clear();
}

pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
let mut buffer = Vec::new();

for err in self.fatal_errs.drain(..) {
err.buffer(&mut buffer);
}

buffer
Token::new(kind, span)
}

/// Report a fatal lexical error with a given span.
Expand Down Expand Up @@ -218,8 +175,8 @@ impl<'a> StringReader<'a> {
&self,
token: rustc_lexer::TokenKind,
start: BytePos,
) -> Result<TokenKind, DiagnosticBuilder<'a>> {
let kind = match token {
) -> TokenKind {
match token {
rustc_lexer::TokenKind::LineComment => {
let string = self.str_from(start);
// comments with only more "/"s are not doc comments
Expand Down Expand Up @@ -396,16 +353,12 @@ impl<'a> StringReader<'a> {
// this should be inside `rustc_lexer`. However, we should first remove compound
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
// as there will be less overall work to do this way.
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
Some(token) => {
err.emit();
Ok(token)
}
None => Err(err),
}
let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
.unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
err.emit();
token
}
};
Ok(kind)
}
}

fn cook_lexer_literal(
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/parse/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ impl<'a> TokenTreesReader<'a> {
loop {
let token = self.string_reader.next_token();
match token.kind {
token::Whitespace | token::Comment | token::Shebang(_) => {
token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
self.joint_to_prev = NonJoint;
}
_ => {
Expand Down
4 changes: 3 additions & 1 deletion src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ pub enum TokenKind {
/// A comment.
Comment,
Shebang(ast::Name),
/// A completely invalid token which should be skipped.
Unknown(ast::Name),

Eof,
}
Expand Down Expand Up @@ -603,7 +605,7 @@ impl Token {
DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
Question | OpenDelim(..) | CloseDelim(..) |
Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
Whitespace | Comment | Shebang(..) | Eof => return None,
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None,
};

Some(Token::new(kind, self.span.to(joint.span)))
Expand Down
1 change: 1 addition & 0 deletions src/libsyntax/print/pprust.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option<Span>)
token::Whitespace => " ".to_string(),
token::Comment => "/* */".to_string(),
token::Shebang(s) => format!("/* shebang: {}*/", s),
token::Unknown(s) => s.to_string(),

token::Interpolated(ref nt) => nonterminal_to_string(nt),
}
Expand Down
Loading