Skip to content

Commit

Permalink
Use an enum for keywords and intern them to improve parser performance
Browse files Browse the repository at this point in the history
Currently, keywords are stored in hashsets that are recreated for every
Parser instance, which is quite expensive since macro expansion creates
lots of them. Additionally, the parser functions that look for a keyword
currently accept a string and have a runtime check to validate that they
actually received a keyword.

By creating an enum for the keywords and inserting them into the
ident interner, we can avoid the creation of the hashsets and get static
checks for the keywords.

For libstd, this cuts the parse+expansion part from ~2.6s to ~1.6s.
  • Loading branch information
dotdash committed May 25, 2013
1 parent b238a08 commit 6c62d77
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 245 deletions.
5 changes: 3 additions & 2 deletions src/libsyntax/ext/trace_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use ext::base::ExtCtxt;
use ext::base;
use parse::lexer::{new_tt_reader, reader};
use parse::parser::Parser;
use parse::token::keywords;

pub fn expand_trace_macros(cx: @ExtCtxt,
sp: span,
Expand All @@ -36,9 +37,9 @@ pub fn expand_trace_macros(cx: @ExtCtxt,
rdr.dup()
);

if rust_parser.is_keyword("true") {
if rust_parser.is_keyword(keywords::True) {
cx.set_trace_macros(true);
} else if rust_parser.is_keyword("false") {
} else if rust_parser.is_keyword(keywords::False) {
cx.set_trace_macros(false);
} else {
cx.span_fatal(sp, "trace_macros! only accepts `true` or `false`")
Expand Down
97 changes: 13 additions & 84 deletions src/libsyntax/parse/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use ast;
use codemap::{BytePos, spanned};
use parse::lexer::reader;
use parse::parser::Parser;
use parse::token::keywords;
use parse::token;

use opt_vec;
Expand Down Expand Up @@ -133,54 +134,15 @@ pub impl Parser {
return if *self.token == *tok { self.bump(); true } else { false };
}

// Storing keywords as interned idents instead of strings would be nifty.

// A sanity check that the word we are asking for is a known keyword
// NOTE: this could be done statically....
fn require_keyword(&self, word: &str) {
if !self.keywords.contains_equiv(&word) {
self.bug(fmt!("unknown keyword: %s", word));
}
}

// return true when this token represents the given string, and is not
// followed immediately by :: .
fn token_is_word(&self, word: &str, tok: &token::Token) -> bool {
match *tok {
token::IDENT(sid, false) => { word == *self.id_to_str(sid) }
_ => { false }
}
}

fn token_is_keyword(&self, word: &str, tok: &token::Token) -> bool {
self.require_keyword(word);
self.token_is_word(word, tok)
}

fn is_keyword(&self, word: &str) -> bool {
self.token_is_keyword(word, &copy *self.token)
}

fn id_is_any_keyword(&self, id: ast::ident) -> bool {
self.keywords.contains(self.id_to_str(id))
}

fn is_any_keyword(&self, tok: &token::Token) -> bool {
match *tok {
token::IDENT(sid, false) => {
self.keywords.contains(self.id_to_str(sid))
}
_ => false
}
fn is_keyword(&self, kw: keywords::Keyword) -> bool {
token::is_keyword(kw, self.token)
}

// if the given word is not a keyword, signal an error.
// if the next token is the given keyword, eat it and return
// true. Otherwise, return false.
fn eat_keyword(&self, word: &str) -> bool {
self.require_keyword(word);
fn eat_keyword(&self, kw: keywords::Keyword) -> bool {
let is_kw = match *self.token {
token::IDENT(sid, false) => word == *self.id_to_str(sid),
token::IDENT(sid, false) => kw.to_ident().repr == sid.repr,
_ => false
};
if is_kw { self.bump() }
Expand All @@ -190,63 +152,30 @@ pub impl Parser {
// if the given word is not a keyword, signal an error.
// if the next token is not the given word, signal an error.
// otherwise, eat it.
fn expect_keyword(&self, word: &str) {
self.require_keyword(word);
if !self.eat_keyword(word) {
fn expect_keyword(&self, kw: keywords::Keyword) {
if !self.eat_keyword(kw) {
self.fatal(
fmt!(
"expected `%s`, found `%s`",
word,
*self.id_to_str(kw.to_ident()),
self.this_token_to_str()
)
);
}
}

// return true if the given string is a strict keyword
fn is_strict_keyword(&self, word: &str) -> bool {
self.strict_keywords.contains_equiv(&word)
}

// signal an error if the current token is a strict keyword
fn check_strict_keywords(&self) {
match *self.token {
token::IDENT(_, false) => {
let w = token_to_str(self.reader, &copy *self.token);
self.check_strict_keywords_(w);
}
_ => ()
}
}

// signal an error if the given string is a strict keyword
fn check_strict_keywords_(&self, w: &str) {
if self.is_strict_keyword(w) {
fn check_strict_keywords(&self) {
if token::is_strict_keyword(self.token) {
self.span_err(*self.last_span,
fmt!("found `%s` in ident position", w));
fmt!("found `%s` in ident position", self.this_token_to_str()));
}
}

// return true if this is a reserved keyword
fn is_reserved_keyword(&self, word: &str) -> bool {
self.reserved_keywords.contains_equiv(&word)
}

// signal an error if the current token is a reserved keyword
fn check_reserved_keywords(&self) {
match *self.token {
token::IDENT(_, false) => {
let w = token_to_str(self.reader, &copy *self.token);
self.check_reserved_keywords_(w);
}
_ => ()
}
}

// signal an error if the given string is a reserved keyword
fn check_reserved_keywords_(&self, w: &str) {
if self.is_reserved_keyword(w) {
self.fatal(fmt!("`%s` is a reserved keyword", w));
if token::is_reserved_keyword(self.token) {
self.fatal(fmt!("`%s` is a reserved keyword", self.this_token_to_str()));
}
}

Expand Down
6 changes: 3 additions & 3 deletions src/libsyntax/parse/obsolete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use ast::{expr, expr_lit, lit_nil, attribute};
use ast;
use codemap::{span, respan};
use parse::parser::Parser;
use parse::token::Token;
use parse::token::{keywords, Token};
use parse::token;

use core::to_bytes;
Expand Down Expand Up @@ -295,9 +295,9 @@ pub impl Parser {
}

fn try_parse_obsolete_priv_section(&self, attrs: &[attribute]) -> bool {
if self.is_keyword("priv") && self.look_ahead(1) == token::LBRACE {
if self.is_keyword(keywords::Priv) && self.look_ahead(1) == token::LBRACE {
self.obsolete(copy *self.span, ObsoletePrivSection);
self.eat_keyword("priv");
self.eat_keyword(keywords::Priv);
self.bump();
while *self.token != token::RBRACE {
self.parse_single_struct_field(ast::private, attrs.to_owned());
Expand Down
Loading

6 comments on commit 6c62d77

@bors
Copy link
Contributor

@bors bors commented on 6c62d77 May 25, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

saw approval from pcwalton
at dotdash@6c62d77

@bors
Copy link
Contributor

@bors bors commented on 6c62d77 May 25, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merging dotdash/rust/static_keywords = 6c62d77 into auto

@bors
Copy link
Contributor

@bors bors commented on 6c62d77 May 25, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dotdash/rust/static_keywords = 6c62d77 merged ok, testing candidate = 9d37d03

@bors
Copy link
Contributor

@bors bors commented on 6c62d77 May 25, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bors
Copy link
Contributor

@bors bors commented on 6c62d77 May 25, 2013

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fast-forwarding incoming to auto = 9d37d03

@graydon
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. We used to do this but changed at one point. Happy to revert.

Please sign in to comment.