From 8b932dfda77f8a48f0d134c31c4b33382724a69c Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 19 Aug 2019 19:00:24 +0300 Subject: [PATCH 1/2] remove composite tokens support from the lexer --- src/librustc_lexer/src/lib.rs | 176 +++-------------------------- src/libsyntax/parse/lexer/mod.rs | 25 ---- src/libsyntax/parse/lexer/tests.rs | 32 ++++-- 3 files changed, 34 insertions(+), 199 deletions(-) diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index afef307a0ed3..41b47befaf14 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -23,9 +23,6 @@ pub enum TokenKind { Lifetime { starts_with_number: bool }, Semi, Comma, - DotDotDot, - DotDotEq, - DotDot, Dot, OpenParen, CloseParen, @@ -37,41 +34,19 @@ pub enum TokenKind { Pound, Tilde, Question, - ColonColon, Colon, Dollar, - EqEq, Eq, - FatArrow, - Ne, Not, - Le, - LArrow, Lt, - ShlEq, - Shl, - Ge, Gt, - ShrEq, - Shr, - RArrow, Minus, - MinusEq, And, - AndAnd, - AndEq, Or, - OrOr, - OrEq, - PlusEq, Plus, - StarEq, Star, - SlashEq, Slash, - CaretEq, Caret, - PercentEq, Percent, Unknown, } @@ -135,13 +110,7 @@ impl Cursor<'_> { '/' => match self.nth_char(0) { '/' => self.line_comment(), '*' => self.block_comment(), - _ => { - if self.eat_assign() { - SlashEq - } else { - Slash - } - } + _ => Slash, }, c if character_properties::is_whitespace(c) => self.whitespace(), 'r' => match (self.nth_char(0), self.nth_char(1)) { @@ -199,22 +168,7 @@ impl Cursor<'_> { } ';' => Semi, ',' => Comma, - '.' => { - if self.nth_char(0) == '.' { - self.bump(); - if self.nth_char(0) == '.' { - self.bump(); - DotDotDot - } else if self.nth_char(0) == '=' { - self.bump(); - DotDotEq - } else { - DotDot - } - } else { - Dot - } - } + '.' => Dot, '(' => OpenParen, ')' => CloseParen, '{' => OpenBrace, @@ -225,112 +179,19 @@ impl Cursor<'_> { '#' => Pound, '~' => Tilde, '?' => Question, - ':' => { - if self.nth_char(0) == ':' { - self.bump(); - ColonColon - } else { - Colon - } - } + ':' => Colon, '$' => Dollar, - '=' => { - if self.nth_char(0) == '=' { - self.bump(); - EqEq - } else if self.nth_char(0) == '>' { - self.bump(); - FatArrow - } else { - Eq - } - } - '!' => { - if self.nth_char(0) == '=' { - self.bump(); - Ne - } else { - Not - } - } - '<' => match self.nth_char(0) { - '=' => { - self.bump(); - Le - } - '<' => { - self.bump(); - if self.eat_assign() { ShlEq } else { Shl } - } - '-' => { - self.bump(); - LArrow - } - _ => Lt, - }, - '>' => match self.nth_char(0) { - '=' => { - self.bump(); - Ge - } - '>' => { - self.bump(); - if self.eat_assign() { ShrEq } else { Shr } - } - _ => Gt, - }, - '-' => { - if self.nth_char(0) == '>' { - self.bump(); - RArrow - } else { - if self.eat_assign() { MinusEq } else { Minus } - } - } - '&' => { - if self.nth_char(0) == '&' { - self.bump(); - AndAnd - } else { - if self.eat_assign() { AndEq } else { And } - } - } - '|' => { - if self.nth_char(0) == '|' { - self.bump(); - OrOr - } else { - if self.eat_assign() { OrEq } else { Or } - } - } - '+' => { - if self.eat_assign() { - PlusEq - } else { - Plus - } - } - '*' => { - if self.eat_assign() { - StarEq - } else { - Star - } - } - '^' => { - if self.eat_assign() { - CaretEq - } else { - Caret - } - } - '%' => { - if self.eat_assign() { - PercentEq - } else { - Percent - } - } + '=' => Eq, + '!' => Not, + '<' => Lt, + '>' => Gt, + '-' => Minus, + '&' => And, + '|' => Or, + '+' => Plus, + '*' => Star, + '^' => Caret, + '%' => Percent, '\'' => self.lifetime_or_char(), '"' => { let terminated = self.double_quoted_string(); @@ -643,15 +504,6 @@ impl Cursor<'_> { self.bump(); } } - - fn eat_assign(&mut self) -> bool { - if self.nth_char(0) == '=' { - self.bump(); - true - } else { - false - } - } } pub mod character_properties { diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index bdf468a52bb3..66add869359d 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -273,9 +273,6 @@ impl<'a> StringReader<'a> { } rustc_lexer::TokenKind::Semi => token::Semi, rustc_lexer::TokenKind::Comma => token::Comma, - rustc_lexer::TokenKind::DotDotDot => token::DotDotDot, - rustc_lexer::TokenKind::DotDotEq => token::DotDotEq, - rustc_lexer::TokenKind::DotDot => token::DotDot, rustc_lexer::TokenKind::Dot => token::Dot, rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren), rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren), @@ -287,42 +284,20 @@ impl<'a> StringReader<'a> { rustc_lexer::TokenKind::Pound => token::Pound, rustc_lexer::TokenKind::Tilde => token::Tilde, rustc_lexer::TokenKind::Question => token::Question, - rustc_lexer::TokenKind::ColonColon => token::ModSep, rustc_lexer::TokenKind::Colon => token::Colon, rustc_lexer::TokenKind::Dollar => token::Dollar, - rustc_lexer::TokenKind::EqEq => token::EqEq, rustc_lexer::TokenKind::Eq => token::Eq, - rustc_lexer::TokenKind::FatArrow => token::FatArrow, - rustc_lexer::TokenKind::Ne => token::Ne, rustc_lexer::TokenKind::Not => token::Not, - rustc_lexer::TokenKind::Le => token::Le, - rustc_lexer::TokenKind::LArrow => token::LArrow, rustc_lexer::TokenKind::Lt => token::Lt, - rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl), - rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl), - rustc_lexer::TokenKind::Ge => token::Ge, rustc_lexer::TokenKind::Gt => token::Gt, - rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr), - rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr), - rustc_lexer::TokenKind::RArrow => token::RArrow, rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus), - rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus), rustc_lexer::TokenKind::And => token::BinOp(token::And), - rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And), - rustc_lexer::TokenKind::AndAnd => token::AndAnd, rustc_lexer::TokenKind::Or => token::BinOp(token::Or), - rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or), - rustc_lexer::TokenKind::OrOr => token::OrOr, rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus), - rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus), rustc_lexer::TokenKind::Star => token::BinOp(token::Star), - rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star), rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash), - rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash), rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret), - rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret), rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent), - rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent), rustc_lexer::TokenKind::Unknown => { let c = self.str_from(start).chars().next().unwrap(); diff --git a/src/libsyntax/parse/lexer/tests.rs b/src/libsyntax/parse/lexer/tests.rs index 94570140996f..a915aa42fd15 100644 --- a/src/libsyntax/parse/lexer/tests.rs +++ b/src/libsyntax/parse/lexer/tests.rs @@ -75,42 +75,50 @@ fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind } #[test] -fn doublecolonparsing() { +fn doublecolon_parsing() { with_default_globals(|| { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a b".to_string()), - vec![mk_ident("a"), token::Whitespace, mk_ident("b")]); + check_tokenization( + setup(&sm, &sh, "a b".to_string()), + vec![mk_ident("a"), token::Whitespace, mk_ident("b")], + ); }) } #[test] -fn dcparsing_2() { +fn doublecolon_parsing_2() { with_default_globals(|| { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a::b".to_string()), - vec![mk_ident("a"), token::ModSep, mk_ident("b")]); + check_tokenization( + setup(&sm, &sh, "a::b".to_string()), + vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")], + ); }) } #[test] -fn dcparsing_3() { +fn doublecolon_parsing_3() { with_default_globals(|| { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a ::b".to_string()), - vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]); + check_tokenization( + setup(&sm, &sh, "a ::b".to_string()), + vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")], + ); }) } #[test] -fn dcparsing_4() { +fn doublecolon_parsing_4() { with_default_globals(|| { let sm = Lrc::new(SourceMap::new(FilePathMapping::empty())); let sh = mk_sess(sm.clone()); - check_tokenization(setup(&sm, &sh, "a:: b".to_string()), - vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]); + check_tokenization( + setup(&sm, &sh, "a:: b".to_string()), + vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")], + ); }) } From 914e1f456415eae0ae095dd39dc51c115c1ffb5a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 19 Aug 2019 19:30:44 +0300 Subject: [PATCH 2/2] glue tokens when building token stream --- src/libsyntax/parse/lexer/tokentrees.rs | 40 ++++++++++++++++++++----- src/libsyntax/parse/token.rs | 2 +- src/libsyntax/tokenstream.rs | 2 +- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/src/libsyntax/parse/lexer/tokentrees.rs b/src/libsyntax/parse/lexer/tokentrees.rs index 37e67a2729e6..e5ba7e45309d 100644 --- a/src/libsyntax/parse/lexer/tokentrees.rs +++ b/src/libsyntax/parse/lexer/tokentrees.rs @@ -39,29 +39,29 @@ struct TokenTreesReader<'a> { impl<'a> TokenTreesReader<'a> { // Parse a stream of tokens into a list of `TokenTree`s, up to an `Eof`. fn parse_all_token_trees(&mut self) -> PResult<'a, TokenStream> { - let mut tts = Vec::new(); + let mut buf = TokenStreamBuilder::default(); self.real_token(); while self.token != token::Eof { - tts.push(self.parse_token_tree()?); + buf.push(self.parse_token_tree()?); } - Ok(TokenStream::new(tts)) + Ok(buf.into_token_stream()) } // Parse a stream of tokens into a list of `TokenTree`s, up to a `CloseDelim`. fn parse_token_trees_until_close_delim(&mut self) -> TokenStream { - let mut tts = vec![]; + let mut buf = TokenStreamBuilder::default(); loop { if let token::CloseDelim(..) = self.token.kind { - return TokenStream::new(tts); + return buf.into_token_stream(); } match self.parse_token_tree() { - Ok(tree) => tts.push(tree), + Ok(tree) => buf.push(tree), Err(mut e) => { e.emit(); - return TokenStream::new(tts); + return buf.into_token_stream(); } } } @@ -223,8 +223,32 @@ impl<'a> TokenTreesReader<'a> { _ => { self.token = token; return; - }, + } + } + } + } +} + +#[derive(Default)] +struct TokenStreamBuilder { + buf: Vec, +} + +impl TokenStreamBuilder { + fn push(&mut self, (tree, joint): TreeAndJoint) { + if let Some((TokenTree::Token(prev_token), Joint)) = self.buf.last() { + if let TokenTree::Token(token) = &tree { + if let Some(glued) = prev_token.glue(token) { + self.buf.pop(); + self.buf.push((TokenTree::Token(glued), joint)); + return; + } } } + self.buf.push((tree, joint)) + } + + fn into_token_stream(self) -> TokenStream { + TokenStream::new(self.buf) } } diff --git a/src/libsyntax/parse/token.rs b/src/libsyntax/parse/token.rs index be800b4de66a..1865f925165b 100644 --- a/src/libsyntax/parse/token.rs +++ b/src/libsyntax/parse/token.rs @@ -551,7 +551,7 @@ impl Token { } } - crate fn glue(self, joint: Token) -> Option { + crate fn glue(&self, joint: &Token) -> Option { let kind = match self.kind { Eq => match joint.kind { Eq => EqEq, diff --git a/src/libsyntax/tokenstream.rs b/src/libsyntax/tokenstream.rs index 6ff8898fe216..09a1b93c7bb1 100644 --- a/src/libsyntax/tokenstream.rs +++ b/src/libsyntax/tokenstream.rs @@ -414,7 +414,7 @@ impl TokenStreamBuilder { let last_tree_if_joint = self.0.last().and_then(TokenStream::last_tree_if_joint); if let Some(TokenTree::Token(last_token)) = last_tree_if_joint { if let Some((TokenTree::Token(token), is_joint)) = stream.first_tree_and_joint() { - if let Some(glued_tok) = last_token.glue(token) { + if let Some(glued_tok) = last_token.glue(&token) { let last_stream = self.0.pop().unwrap(); self.push_all_but_last_tree(&last_stream); let glued_tt = TokenTree::Token(glued_tok);