From 28c0dd73d3097b3d6f9a8a8d4f158026c4753f3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Sat, 22 Jun 2024 00:07:32 +0900 Subject: [PATCH 1/5] Lexer.next_token --- crates/swc_ecma_parser/src/lexer/state.rs | 224 +++++++++++----------- 1 file changed, 114 insertions(+), 110 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 06134a1e94c9..55dd14973a13 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -192,145 +192,149 @@ impl Tokens for Lexer<'_> { } } -impl<'a> Iterator for Lexer<'a> { - type Item = TokenAndSpan; - - fn next(&mut self) -> Option { - let mut start = self.cur_pos(); +impl Lexer<'_> { + fn next_token(&mut self, mut start: BytePos) -> Result, Error> { + if let Some(start) = self.state.next_regexp { + return Ok(Some(self.read_regexp(start)?)); + } - let res = (|| -> Result, _> { - if let Some(start) = self.state.next_regexp { - return Ok(Some(self.read_regexp(start)?)); - } - - if self.state.is_first { - if let Some(shebang) = self.read_shebang()? { - return Ok(Some(Token::Shebang(shebang))); - } + if self.state.is_first { + if let Some(shebang) = self.read_shebang()? { + return Ok(Some(Token::Shebang(shebang))); } + } - self.state.had_line_break = self.state.is_first; - self.state.is_first = false; + self.state.had_line_break = self.state.is_first; + self.state.is_first = false; - // skip spaces before getting next character, if we are allowed to. - if self.state.can_skip_space() { - self.skip_space::()?; - start = self.input.cur_pos(); - }; + // skip spaces before getting next character, if we are allowed to. + if self.state.can_skip_space() { + self.skip_space::()?; + start = self.input.cur_pos(); + }; - match self.input.cur() { - Some(..) => {} - // End of input. - None => { - if let Some(comments) = self.comments.as_mut() { - let comments_buffer = self.comments_buffer.as_mut().unwrap(); - let last = self.state.prev_hi; - - // move the pending to the leading or trailing - for c in comments_buffer.take_pending_leading() { - // if the file had no tokens and no shebang, then treat any - // comments in the leading comments buffer as leading. - // Otherwise treat them as trailing. - if last == self.start_pos { - comments_buffer.push(BufferedComment { - kind: BufferedCommentKind::Leading, - pos: last, - comment: c, - }); - } else { - comments_buffer.push(BufferedComment { - kind: BufferedCommentKind::Trailing, - pos: last, - comment: c, - }); - } + match self.input.cur() { + Some(..) => {} + // End of input. + None => { + if let Some(comments) = self.comments.as_mut() { + let comments_buffer = self.comments_buffer.as_mut().unwrap(); + let last = self.state.prev_hi; + + // move the pending to the leading or trailing + for c in comments_buffer.take_pending_leading() { + // if the file had no tokens and no shebang, then treat any + // comments in the leading comments buffer as leading. + // Otherwise treat them as trailing. + if last == self.start_pos { + comments_buffer.push(BufferedComment { + kind: BufferedCommentKind::Leading, + pos: last, + comment: c, + }); + } else { + comments_buffer.push(BufferedComment { + kind: BufferedCommentKind::Trailing, + pos: last, + comment: c, + }); } + } - // now fill the user's passed in comments - for comment in comments_buffer.take_comments() { - match comment.kind { - BufferedCommentKind::Leading => { - comments.add_leading(comment.pos, comment.comment); - } - BufferedCommentKind::Trailing => { - comments.add_trailing(comment.pos, comment.comment); - } + // now fill the user's passed in comments + for comment in comments_buffer.take_comments() { + match comment.kind { + BufferedCommentKind::Leading => { + comments.add_leading(comment.pos, comment.comment); + } + BufferedCommentKind::Trailing => { + comments.add_trailing(comment.pos, comment.comment); } } } - - return Ok(None); } - }; - - // println!( - // "\tContext: ({:?}) {:?}", - // self.input.cur().unwrap(), - // self.state.context.0 - // ); - self.state.start = start; + return Ok(None); + } + }; - if self.syntax.jsx() && !self.ctx.in_property_name && !self.ctx.in_type { - //jsx - if self.state.context.current() == Some(TokenContext::JSXExpr) { - return self.read_jsx_token(); - } + // println!( + // "\tContext: ({:?}) {:?}", + // self.input.cur().unwrap(), + // self.state.context.0 + // ); - let c = self.cur(); - if let Some(c) = c { - if self.state.context.current() == Some(TokenContext::JSXOpeningTag) - || self.state.context.current() == Some(TokenContext::JSXClosingTag) - { - if c.is_ident_start() { - return self.read_jsx_word().map(Some); - } + self.state.start = start; - if c == '>' { - unsafe { - // Safety: cur() is Some('>') - self.input.bump(); - } - return Ok(Some(Token::JSXTagEnd)); - } + if self.syntax.jsx() && !self.ctx.in_property_name && !self.ctx.in_type { + //jsx + if self.state.context.current() == Some(TokenContext::JSXExpr) { + return self.read_jsx_token(); + } - if (c == '\'' || c == '"') - && self.state.context.current() == Some(TokenContext::JSXOpeningTag) - { - return self.read_jsx_str(c).map(Some); - } + let c = self.cur(); + if let Some(c) = c { + if self.state.context.current() == Some(TokenContext::JSXOpeningTag) + || self.state.context.current() == Some(TokenContext::JSXClosingTag) + { + if c.is_ident_start() { + return self.read_jsx_word().map(Some); } - if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') { - let had_line_break_before_last = self.had_line_break_before_last(); - let cur_pos = self.input.cur_pos(); - + if c == '>' { unsafe { - // Safety: cur() is Some('<') + // Safety: cur() is Some('>') self.input.bump(); } + return Ok(Some(Token::JSXTagEnd)); + } + + if (c == '\'' || c == '"') + && self.state.context.current() == Some(TokenContext::JSXOpeningTag) + { + return self.read_jsx_str(c).map(Some); + } + } - if had_line_break_before_last && self.is_str("<<<<<< ") { - let span = Span::new(cur_pos, cur_pos + BytePos(7), Default::default()); + if c == '<' && self.state.is_expr_allowed && self.input.peek() != Some('!') { + let had_line_break_before_last = self.had_line_break_before_last(); + let cur_pos = self.input.cur_pos(); - self.emit_error_span(span, SyntaxError::TS1185); - self.skip_line_comment(6); - self.skip_space::()?; - return self.read_token(); - } + unsafe { + // Safety: cur() is Some('<') + self.input.bump(); + } + + if had_line_break_before_last && self.is_str("<<<<<< ") { + let span = Span::new(cur_pos, cur_pos + BytePos(7), Default::default()); - return Ok(Some(Token::JSXTagStart)); + self.emit_error_span(span, SyntaxError::TS1185); + self.skip_line_comment(6); + self.skip_space::()?; + return self.read_token(); } + + return Ok(Some(Token::JSXTagStart)); } } + } - if let Some(TokenContext::Tpl {}) = self.state.context.current() { - let start = self.state.tpl_start; - return self.read_tmpl_token(start).map(Some); - } + if let Some(TokenContext::Tpl {}) = self.state.context.current() { + let start = self.state.tpl_start; + return self.read_tmpl_token(start).map(Some); + } + + self.read_token() + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = TokenAndSpan; + + fn next(&mut self) -> Option { + let start = self.cur_pos(); - self.read_token() - })(); + let res = self.next_token(start); let token = match res.map_err(Token::Error).map_err(Some) { Ok(t) => t, From 5db45065ddbb7711f9ab5b3a077248aeb50e4f41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Sat, 22 Jun 2024 00:11:21 +0900 Subject: [PATCH 2/5] fix --- crates/swc_ecma_parser/src/lexer/state.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 55dd14973a13..87d4d21ec763 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -193,7 +193,7 @@ impl Tokens for Lexer<'_> { } impl Lexer<'_> { - fn next_token(&mut self, mut start: BytePos) -> Result, Error> { + fn next_token(&mut self, start: &mut BytePos) -> Result, Error> { if let Some(start) = self.state.next_regexp { return Ok(Some(self.read_regexp(start)?)); } @@ -210,7 +210,7 @@ impl Lexer<'_> { // skip spaces before getting next character, if we are allowed to. if self.state.can_skip_space() { self.skip_space::()?; - start = self.input.cur_pos(); + *start = self.input.cur_pos(); }; match self.input.cur() { @@ -264,7 +264,7 @@ impl Lexer<'_> { // self.state.context.0 // ); - self.state.start = start; + self.state.start = *start; if self.syntax.jsx() && !self.ctx.in_property_name && !self.ctx.in_type { //jsx @@ -332,9 +332,9 @@ impl<'a> Iterator for Lexer<'a> { type Item = TokenAndSpan; fn next(&mut self) -> Option { - let start = self.cur_pos(); + let mut start = self.cur_pos(); - let res = self.next_token(start); + let res = self.next_token(&mut start); let token = match res.map_err(Token::Error).map_err(Some) { Ok(t) => t, From 3ef77fa8859ec6ea25fedd53fc87ffeaea7e07e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Sat, 22 Jun 2024 00:16:25 +0900 Subject: [PATCH 3/5] #[cold] --- crates/swc_ecma_parser/src/lexer/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index aaaab140a060..d6fc4fc1508f 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -1210,6 +1210,7 @@ impl<'a> Lexer<'a> { Ok(Token::Regex(content, flags)) } + #[cold] fn read_shebang(&mut self) -> LexResult> { if self.input.cur() != Some('#') || self.input.peek() != Some('!') { return Ok(None); From f787f2d4f5a72df9c8a299dd76f9353c3918b940 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Sat, 22 Jun 2024 00:20:24 +0900 Subject: [PATCH 4/5] Small tidy --- crates/swc_ecma_parser/src/lexer/state.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 87d4d21ec763..e7997bb4c1c1 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -787,7 +787,7 @@ impl TokenContext { Self::BraceExpr | Self::TplQuasi | Self::ParenExpr - | Self::Tpl { .. } + | Self::Tpl | Self::FnExpr | Self::ClassExpr | Self::JSXExpr @@ -796,7 +796,7 @@ impl TokenContext { pub(crate) const fn preserve_space(&self) -> bool { match self { - Self::Tpl { .. } | Self::JSXExpr => true, + Self::Tpl | Self::JSXExpr => true, _ => false, } } From e605cf718a1972073a25bcc3d67c8b89e5ace104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EA=B0=95=EB=8F=99=EC=9C=A4=20=28Donny=29?= Date: Sat, 22 Jun 2024 00:23:05 +0900 Subject: [PATCH 5/5] Use smallvec --- crates/swc_ecma_parser/src/lexer/state.rs | 9 +++++---- crates/swc_ecma_parser/src/parser/typescript.rs | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index e7997bb4c1c1..07da5b7bda84 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -1,5 +1,6 @@ use std::mem::take; +use smallvec::{smallvec, SmallVec}; use swc_common::{BytePos, Span}; use tracing::trace; @@ -371,7 +372,7 @@ impl<'a> Iterator for Lexer<'a> { impl State { pub fn new(syntax: Syntax, start_pos: BytePos) -> Self { - let context = TokenContexts(vec![TokenContext::BraceStmt]); + let context = TokenContexts(smallvec![TokenContext::BraceStmt]); State { is_expr_allowed: true, @@ -646,7 +647,7 @@ impl State { } #[derive(Clone, Default)] -pub struct TokenContexts(pub(crate) Vec); +pub struct TokenContexts(pub(crate) SmallVec<[TokenContext; 32]>); impl TokenContexts { /// Returns true if following `LBrace` token is `block statement` according @@ -817,9 +818,9 @@ where let res = f(&mut l); #[cfg(debug_assertions)] - let c = vec![TokenContext::BraceStmt]; + let c = TokenContexts(smallvec![TokenContext::BraceStmt]); #[cfg(debug_assertions)] - debug_assert_eq!(l.state.context.0, c); + debug_assert_eq!(l.state.context.0, c.0); res }) diff --git a/crates/swc_ecma_parser/src/parser/typescript.rs b/crates/swc_ecma_parser/src/parser/typescript.rs index 454534405a5c..3a5a3c1d10b0 100644 --- a/crates/swc_ecma_parser/src/parser/typescript.rs +++ b/crates/swc_ecma_parser/src/parser/typescript.rs @@ -2771,7 +2771,7 @@ impl Parser { let cloned = self.input.token_context().clone(); self.input - .set_token_context(TokenContexts(vec![cloned.0[0]])); + .set_token_context(TokenContexts(smallvec::smallvec![cloned.0[0]])); let res = op(self); self.input.set_token_context(cloned);