diff --git a/compiler/lexer.cpp b/compiler/lexer.cpp index ee42e4719..656f064e5 100644 --- a/compiler/lexer.cpp +++ b/compiler/lexer.cpp @@ -1221,6 +1221,14 @@ Lexer::Lexer(CompileContext& cc) } } +Lexer::~Lexer() { + while (!token_caches_.empty()) { + auto node = *token_caches_.begin(); + token_caches_.remove(node); + delete node; + } +} + void Lexer::Init(std::shared_ptr sf) { freading_ = true; EnterFile(std::move(sf), {}); @@ -1342,6 +1350,9 @@ int Lexer::lex() { return current_token()->id; } + if (!injected_token_stream_.empty()) + return LexInjectedToken(); + return LexNewToken(); } @@ -1390,6 +1401,22 @@ int Lexer::LexNewToken() { return tok->id; } +int Lexer::LexInjectedToken() { + auto tok = advance_token_ptr(); + *tok = ke::PopFront(&injected_token_stream_); + + if (tok->id == tMAYBE_LABEL) { + if (allow_tags_) { + tok->id = tLABEL; + [[maybe_unused]] auto tok = ke::PopFront(&injected_token_stream_); + assert(tok.id == ':'); + } else { + tok->id = tSYMBOL; + } + } + return tok->id; +} + void Lexer::FillTokenPos(token_pos_t* pos) { uint32_t offset = state_.pos - state_.start; if (!state_.macro) @@ -1789,7 +1816,9 @@ void Lexer::LexSymbol(full_token_t* tok, sp::Atom* atom) { tok->id = tSYMBOL; if (peek() == ':' && peek2() != ':') { - if (allow_tags_) { + if (caching_tokens_) { + tok->id = tMAYBE_LABEL; + } else if (allow_tags_) { tok->id = tLABEL; advance(); } else if (cc_.types()->find(atom)) { @@ -2571,3 +2600,56 @@ bool Lexer::IsSameSourceFile(const token_pos_t& a, const token_pos_t& b) { return true; return cc_.sources()->IsSameSourceFile(a, b); } + +void Lexer::AssertCleanState() { + assert(allow_keywords_); + assert(allow_substitutions_); + assert(!in_string_continuation_); + assert(allow_tags_); + assert(injected_token_stream_.empty()); +} + +TokenCache* Lexer::LexFunctionBody() { + TokenCache* cache = new TokenCache; + cache->require_newdecls = state_.require_newdecls; + cache->need_semicolon = state_.need_semicolon; + + // To cache tokens we must be assured that the lexer state contains no + // surprises, otherwise, the uncached stream may resolve incorrectly. + AssertCleanState(); + + assert(current_token()->id == '{'); + cache->tokens.emplace_back(std::move(*current_token())); + + ke::SaveAndSet caching_tokens(&caching_tokens_, true); + + int brace_balance = 1; + while (freading_) { + int tok = lex(); + if (tok == 0) + break; + cache->tokens.emplace_back(std::move(*current_token())); + + if (tok == '{') { + brace_balance++; + } else if (tok == '}') { + brace_balance--; + if (brace_balance == 0) + break; + } + } + + cache->tokens.shrink_to_fit(); + token_caches_.append(cache); + return cache; +} + +void Lexer::InjectCachedTokens(TokenCache* cache) { + AssertCleanState(); + + injected_token_stream_ = std::move(cache->tokens); + token_caches_.remove(cache); + delete cache; + + freading_ = true; +} diff --git a/compiler/lexer.h b/compiler/lexer.h index 7080fcedf..4ad6df1b8 100644 --- a/compiler/lexer.h +++ b/compiler/lexer.h @@ -19,9 +19,11 @@ // 3. This notice may not be removed or altered from any source distribution. #pragma once +#include #include #include #include +#include #include #include "compile-options.h" @@ -213,6 +215,7 @@ enum TokenKind { tEOL, /* newline, only returned by peek_new_line() */ tNEWDECL, /* for declloc() */ tENTERED_MACRO, /* internal lexer command */ + tMAYBE_LABEL, /* internal lexer command, followed by ':' */ tLAST_TOKEN_ID }; @@ -275,12 +278,19 @@ static constexpr int SKIPMODE = 1; /* bit field in "#if" stack */ static constexpr int PARSEMODE = 2; /* bit field in "#if" stack */ static constexpr int HANDLED_ELSE = 4; /* bit field in "#if" stack */ +struct TokenCache : public ke::InlineListNode { + std::deque tokens; + bool require_newdecls; + bool need_semicolon; +}; + class Lexer { friend class MacroProcessor; public: Lexer(CompileContext& cc); + ~Lexer(); int lex(); int lex_same_line(); @@ -306,6 +316,17 @@ class Lexer void LexDefinedKeyword(); bool HasMacro(sp::Atom* atom); + // Lexer must be at a '{' token. Lexes until it reaches a balanced '}' token, + // and returns a pointer to the cached tokens. + // + // The opening '{' token, even though already lexed, will be re-added to the + // stream. This is to avoid significantly changing parse_stmt. + TokenCache* LexFunctionBody(); + + // Consumes a TokenCache. The pointer is deleted after. Consumed tokens will + // be replayed by lex(). + void InjectCachedTokens(TokenCache* cache); + full_token_t lex_tok() { lex(); return *current_token(); @@ -317,7 +338,8 @@ class Lexer const token_pos_t& pos() { return current_token()->start; } std::string& deprecate() { return deprecate_; } bool& allow_tags() { return allow_tags_; } - int& require_newdecls() { return state_.require_newdecls; } + bool& require_newdecls() { return state_.require_newdecls; } + bool& need_semicolon() { return state_.need_semicolon; } bool freading() const { return freading_; } int fcurrent() const { return state_.inpf->sources_index(); } unsigned fline() const { return state_.fline; } @@ -335,6 +357,7 @@ class Lexer void HandleEof(); void HandleSkippedSection(); int LexNewToken(); + int LexInjectedToken(); void LexIntoToken(full_token_t* tok); void LexSymbolOrKeyword(full_token_t* tok); int LexKeywordImpl(sp::Atom* atom); @@ -356,6 +379,7 @@ class Lexer void SkipUtf8Bom(); void PushLexerState(); bool IsSameSourceFile(const token_pos_t& a, const token_pos_t& b); + void AssertCleanState(); full_token_t* advance_token_ptr(); full_token_t* next_token(); @@ -470,7 +494,7 @@ class Lexer int tokline = 0; bool need_semicolon = false; bool is_line_start = false; - int require_newdecls = 0; + bool require_newdecls = false; size_t entry_preproc_if_stack_size = 0; const unsigned char* start = nullptr; const unsigned char* end = nullptr; @@ -485,4 +509,7 @@ class Lexer LexerState state_; tr::vector prev_state_; + ke::InlineList token_caches_; + std::deque injected_token_stream_; + bool caching_tokens_ = false; }; diff --git a/compiler/parse-node.h b/compiler/parse-node.h index 3bea599d3..0f88d436c 100644 --- a/compiler/parse-node.h +++ b/compiler/parse-node.h @@ -1566,6 +1566,9 @@ class FunctionDecl : public Decl Stmt* body() const { return body_; } void set_body(Stmt* body) { body_ = body; } + TokenCache* tokens() const { return tokens_; } + void set_tokens(TokenCache* tokens) { tokens_ = tokens; } + void set_name(sp::Atom* name) { name_ = name; } // The undecorated name. @@ -1631,6 +1634,7 @@ class FunctionDecl : public Decl ke::Maybe this_tag_; sp::Atom* alias_ = nullptr; PoolString* deprecate_ = nullptr; + TokenCache* tokens_ = nullptr; bool analyzed_ SP_BITFIELD(1); bool analyze_result_ SP_BITFIELD(1); bool is_public_ SP_BITFIELD(1); diff --git a/compiler/parser.cpp b/compiler/parser.cpp index 2172a1db6..49377314f 100644 --- a/compiler/parser.cpp +++ b/compiler/parser.cpp @@ -189,6 +189,25 @@ Parser::Parse() add_to_end.pop_front(); } + while (!delayed_functions_.empty()) { + auto fun = ke::PopFront(&delayed_functions_); + + auto tokens = fun->tokens(); + fun->set_tokens(nullptr); + + // Technically this is not good enough, as the lexer state could have + // changed in the middle of a function. But that's fairly complex to + // handle and pretty ridiculous as far as use cases go. + ke::SaveAndSet change_newdecls(&lexer_->require_newdecls(), + tokens->require_newdecls); + ke::SaveAndSet change_need_semicolon(&lexer_->need_semicolon(), + tokens->need_semicolon); + + lexer_->InjectCachedTokens(tokens); + auto body = parse_stmt(false); + fun->set_body(BlockStmt::WrapStmt(body)); + } + auto list = new StmtList(token_pos_t{}, stmts); return new ParseTree(list); } @@ -1808,15 +1827,16 @@ Parser::parse_function(FunctionDecl* fun, int tokid, bool has_this) break; } - if (!lexer_->peek('{')) + if (!lexer_->match('{')) { report(437); - - Stmt* body = parse_stmt(false); - if (!body) return false; + } - fun->set_body(BlockStmt::WrapStmt(body)); + auto cache = lexer_->LexFunctionBody(); + fun->set_tokens(cache); fun->set_end_pos(lexer_->pos()); + delayed_functions_.emplace_back(fun); + return true; } @@ -1987,7 +2007,7 @@ Parser::parse_methodmap_method(MethodmapDecl* map) if (ret_type.type.ident != 0 && !is_static) has_this = true; - ke::SaveAndSet require_newdecls(&lexer_->require_newdecls(), TRUE); + ke::SaveAndSet require_newdecls(&lexer_->require_newdecls(), true); if (!parse_function(fun, is_native ? tMETHODMAP : 0, has_this)) return nullptr; diff --git a/compiler/parser.h b/compiler/parser.h index 3fc1fb8fa..eefe92a06 100644 --- a/compiler/parser.h +++ b/compiler/parser.h @@ -136,4 +136,5 @@ class Parser std::vector static_scopes_; std::shared_ptr lexer_; TypeDictionary* types_ = nullptr; + std::deque delayed_functions_; };