diff --git a/AMBuildScript b/AMBuildScript index 28d8f0be1..a2d4f20ce 100644 --- a/AMBuildScript +++ b/AMBuildScript @@ -108,10 +108,10 @@ class Config(object): cxx.postlink += ['-lpthread', '-lrt'] elif cxx.target.platform == 'mac': cxx.linkflags.remove('-lstdc++') - cxx.cflags += ['-mmacosx-version-min=10.7'] + cxx.cflags += ['-mmacosx-version-min=10.15'] cxx.cflags += ['-stdlib=libc++'] cxx.linkflags += ['-stdlib=libc++'] - cxx.linkflags += ['-mmacosx-version-min=10.7'] + cxx.linkflags += ['-mmacosx-version-min=10.15'] elif cxx.target.platform == 'windows': cxx.defines += ['WIN32', '_WINDOWS'] cxx.cxxdefines += ['NOMINMAX'] @@ -224,6 +224,7 @@ class Config(object): '/EHsc', '/GR-', '/TP', + '/std:c++17', ] cxx.linkflags += [ 'kernel32.lib', diff --git a/compiler/compile-context.cpp b/compiler/compile-context.cpp index b46d7c2e5..d5302cb07 100644 --- a/compiler/compile-context.cpp +++ b/compiler/compile-context.cpp @@ -40,7 +40,7 @@ CompileContext::CompileContext() reports_ = std::make_unique(*this); options_ = std::make_unique(); - sources_ = std::make_unique(*this); + sources_ = std::make_unique(*this); types_ = std::make_unique(*this); types_->init(); } diff --git a/compiler/compile-context.h b/compiler/compile-context.h index 49897207a..fafc5ff9a 100644 --- a/compiler/compile-context.h +++ b/compiler/compile-context.h @@ -36,12 +36,15 @@ class Lexer; class ReportManager; class SemaContext; -class SourceManager; class SymbolScope; class TypeDictionary; struct CompileOptions; struct symbol; +namespace sp { +class SourceManager; +} // namespace sp + // The thread-safe successor to scvars. class CompileContext final { @@ -66,7 +69,7 @@ class CompileContext final const std::shared_ptr& lexer() const { return lexer_; } ReportManager* reports() const { return reports_.get(); } CompileOptions* options() const { return options_.get(); } - SourceManager* sources() const { return sources_.get(); } + sp::SourceManager* sources() const { return sources_.get(); } TypeDictionary* types() const { return types_.get(); } sp::StringPool* atoms() { return &atoms_; } @@ -102,8 +105,8 @@ class CompileContext final std::string& outfname() { return outfname_; } void set_outfname(const std::string& value) { outfname_ = value; } - std::shared_ptr inpf_org() const { return inpf_org_; } - void set_inpf_org(std::shared_ptr sf) { inpf_org_ = sf; } + std::shared_ptr inpf_org() const { return inpf_org_; } + void set_inpf_org(std::shared_ptr sf) { inpf_org_ = sf; } bool must_abort() const { return must_abort_; } void set_must_abort() { must_abort_ = true; } @@ -135,8 +138,8 @@ class CompileContext final std::unique_ptr options_; std::string outfname_; std::string errfname_; - std::unique_ptr sources_; - std::shared_ptr inpf_org_; + std::unique_ptr sources_; + std::shared_ptr inpf_org_; std::unique_ptr types_; sp::StringPool atoms_; diff --git a/compiler/errors.cpp b/compiler/errors.cpp index c279f0a68..d0268fb60 100644 --- a/compiler/errors.cpp +++ b/compiler/errors.cpp @@ -170,7 +170,10 @@ MessageBuilder::~MessageBuilder() ErrorReport report; report.number = number_; - report.fileno = cc.sources()->GetSourceFileIndex(where_); + if (where_.valid()) + report.fileno = cc.sources()->GetSourceFileIndex(where_); + else + report.fileno = 0; report.lineno = std::max(where_.line, 1); if (report.fileno < 0) report.fileno = cc.lexer()->fcurrent(); diff --git a/compiler/lexer.cpp b/compiler/lexer.cpp index ef936a26c..a5a88cfff 100644 --- a/compiler/lexer.cpp +++ b/compiler/lexer.cpp @@ -70,30 +70,32 @@ static constexpr int kLitcharUtf8 = 0x1; // Do not error, because the characters are being ignored. static constexpr int kLitcharSkipping = 0x2; -bool -Lexer::PlungeQualifiedFile(const char* name) -{ +bool Lexer::PlungeQualifiedFile(const char* name) { auto fp = OpenFile(name); if (!fp) return false; + assert(!IsSkipping()); assert(skiplevel_ == ifstack_.size()); /* these two are always the same when "parsing" */ + + auto pos = current_token()->start; + state_.entry_preproc_if_stack_size = ifstack_.size(); PushLexerState(); - EnterFile(std::move(fp)); + EnterFile(std::move(fp), pos); return true; } std::shared_ptr Lexer::OpenFile(const std::string& name) { AutoCountErrors detect_errors; - if (auto sf = cc_.sources()->Open(name, pos())) + if (auto sf = cc_.sources()->Open(name)) return sf; static const std::vector extensions = {".inc", ".p", ".pawn"}; for (const auto& extension : extensions) { auto alt_name = name + extension; - if (auto sf = cc_.sources()->Open(alt_name, pos())) + if (auto sf = cc_.sources()->Open(alt_name)) return sf; if (!detect_errors.ok()) return nullptr; @@ -486,6 +488,7 @@ void Lexer::HandleDirectives() { { ke::SaveAndSet no_macros(&allow_substitutions_, false); ke::SaveAndSet no_keywords(&allow_keywords_, false); + if (!needsymbol(&symbol)) break; } @@ -494,6 +497,8 @@ void Lexer::HandleDirectives() { break; } + auto macro_pos = current_token()->start; + ke::Maybe> args; if (match_char('(')) { ke::SaveAndSet no_macros(&allow_substitutions_, false); @@ -546,11 +551,12 @@ void Lexer::HandleDirectives() { macro->pattern = symbol; macro->documentation = std::move(deprecate_); macro->deprecated = !macro->documentation.empty(); + macro->pos = macro_pos; tr::vector* arg_positions = nullptr; if (macro->args) arg_positions = ¯o->arg_positions; - macro->substitute = SkimUntilEndOfLine(arg_positions); + macro->substitute = cc_.atom(SkimUntilEndOfLine(arg_positions)); macros_[symbol] = std::move(macro); break; @@ -1230,7 +1236,7 @@ Lexer::Lexer(CompileContext& cc) void Lexer::Init(std::shared_ptr sf) { freading_ = true; - EnterFile(std::move(sf)); + EnterFile(std::move(sf), {}); } void Lexer::Start() { @@ -1326,8 +1332,7 @@ Lexer::PushSynthesizedToken(TokenKind kind, const token_pos_t& pos) auto tok = current_token(); tok->id = kind; tok->atom = nullptr; - tok->start.line = state_.tokline; - tok->start.file_ = state_.inpf->sources_index(); + tok->start = token_pos_t(pos, state_.tokline); lexpush(); return tok; } @@ -1365,6 +1370,9 @@ int Lexer::LexNewToken() { FillTokenPos(&tok->start); return tok->id = tEOL; } + + // Always fill a valid location. + FillTokenPos(&tok->start); return 0; } @@ -1396,8 +1404,12 @@ int Lexer::LexNewToken() { } void Lexer::FillTokenPos(token_pos_t* pos) { - pos->line = state_.tokline; - pos->file_ = state_.inpf->sources_index(); + uint32_t offset = state_.pos - state_.start; + if (!state_.macro) + *pos = token_pos_t(state_.loc_range.FilePos(offset), state_.tokline); + else + *pos = token_pos_t(state_.loc_range.MacroPos(offset), state_.tokline); + assert(pos->valid()); } void Lexer::LexIntoToken(full_token_t* tok) { @@ -1927,20 +1939,16 @@ void Lexer::NeedTokenError(int token, int got) { // If the next token is on the current line, return that token. Otherwise, // return tNEWLINE. -int -Lexer::peek_same_line() -{ +int Lexer::peek_same_line() { // We should not call this without having parsed at least one token. assert(token_buffer_->num_tokens > 0); - auto sm = cc_.sources(); - // If there's tokens pushed back, then |fline| is the line of the furthest // token parsed. If fline == current token's line, we are guaranteed any // buffered token is still on the same line. if (token_buffer_->depth > 0 && current_token()->start.line == state_.fline && - sm->IsSameSourceFile(current_token()->start, next_token()->start)) + IsSameSourceFile(current_token()->start, next_token()->start)) { return next_token()->id ? next_token()->id : tEOL; } @@ -1955,7 +1963,7 @@ Lexer::peek_same_line() // If the next token starts on the line the last token ends, then the next // token is considered on the same line. if (next.start.line == current_token()->start.line && - sm->IsSameSourceFile(current_token()->start, next_token()->start)) + IsSameSourceFile(current_token()->start, next_token()->start)) { return next.id; } @@ -2237,7 +2245,7 @@ void Lexer::AddMacro(const char* pattern, const char* subst) { auto atom = cc_.atom(pattern); auto macro = std::make_shared(); macro->pattern = atom; - macro->substitute = subst; + macro->substitute = cc_.atom(subst); macro->deprecated = false; macros_[atom] = std::move(macro); @@ -2306,11 +2314,11 @@ Lexer::NeedSemicolon() return state_.need_semicolon; } -void Lexer::EnterFile(std::shared_ptr&& sf) { +void Lexer::EnterFile(std::shared_ptr&& sf, const token_pos_t& from) { auto& cc = CompileContext::get(); state_.inpf = std::move(sf); - state_.inpf_loc = cc_.sources()->GetLocationRangeEntryForFile(state_.inpf); + state_.loc_range = cc_.sources()->EnterFile(state_.inpf, from); state_.need_semicolon = cc.options()->need_semicolon; state_.require_newdecls = cc.options()->require_newdecls; state_.fline = 1; @@ -2424,6 +2432,8 @@ bool Lexer::EnterMacro(std::shared_ptr macro) { ke::SaveAndSet no_eof(&allow_end_of_file_, false); + auto expansion_pos = current_token()->start; + if (macros_in_use_.count(macro.get())) return false; @@ -2452,20 +2462,26 @@ bool Lexer::EnterMacro(std::shared_ptr macro) { PushLexerState(); + Atom* text = nullptr; if (macro->args) { - state_.pattern = PerformMacroSubstitution(macro.get(), macro_args); - state_.start = reinterpret_cast(state_.pattern.c_str()); - state_.end = state_.start + state_.pattern.size(); + // Atomization is important here since it keeps the macro text alive + // during lexing, since we do not pre-lex its tokens. + // + // We used to not atomize here, in which case it was stored on the + // lexer state. + text = cc_.atom(PerformMacroSubstitution(macro.get(), macro_args)); } else { - state_.start = reinterpret_cast(macro->substitute.c_str()); - state_.end = state_.start + macro->substitute.size(); + text = macro->substitute; } + state_.start = reinterpret_cast(text->chars()); + state_.end = state_.start + text->length(); state_.line_start = state_.start; state_.pos = state_.start; state_.macro = macro; state_.inpf = prev_state_.back().inpf; state_.fline = prev_state_.back().fline; state_.tokline = prev_state_.back().tokline; + state_.loc_range = cc_.sources()->EnterMacro(macro->pos, expansion_pos, text); // Save any tokens we peeked ahead. state_.token_buffer = token_buffer_; @@ -2533,25 +2549,26 @@ std::string Lexer::PerformMacroSubstitution(MacroEntry* macro, std::string out; size_t last_start = 0; + const auto& substitute = macro->substitute->str(); for (const auto& pos : macro->arg_positions) { assert(pos >= last_start); - assert(macro->substitute[pos] == '%'); - assert(IsDigit(macro->substitute[pos + 1])); + assert(substitute[pos] == '%'); + assert(IsDigit(substitute[pos + 1])); - out += macro->substitute.substr(last_start, pos - last_start); + out += substitute.substr(last_start, pos - last_start); last_start = pos + 2; - char arg_pos = macro->substitute[pos + 1] - '0'; + char arg_pos = substitute[pos + 1] - '0'; auto iter = args.find(arg_pos); if (iter == args.end()) { - out.push_back(macro->substitute[pos]); - out.push_back(macro->substitute[pos + 1]); + out.push_back(substitute[pos]); + out.push_back(substitute[pos + 1]); continue; } out += iter->second; } - out += macro->substitute.substr(last_start); + out += substitute.substr(last_start); return out; } @@ -2559,3 +2576,11 @@ void Lexer::SkipUtf8Bom() { if (state_.pos[0] == 0xef && state_.pos[1] == 0xbb && state_.pos[2] == 0xbf) state_.pos += 3; } + +bool Lexer::IsSameSourceFile(const token_pos_t& a, const token_pos_t& b) { + // Almost always, we'll be looking at the most recent location. peek_same_line + // is extremely hot so keep this fast-path fast. + if (state_.loc_range.owns(a) && state_.loc_range.owns(b)) + return true; + return cc_.sources()->IsSameSourceFile(a, b); +} diff --git a/compiler/lexer.h b/compiler/lexer.h index 6f859c306..5c79f37fd 100644 --- a/compiler/lexer.h +++ b/compiler/lexer.h @@ -296,10 +296,10 @@ class Lexer void lexpush(); void lexclr(int clreol); - void Init(std::shared_ptr sf); + void Init(std::shared_ptr sf); void Start(); bool PlungeFile(const char* name, int try_currentpath, int try_includepaths); - std::shared_ptr OpenFile(const std::string& name); + std::shared_ptr OpenFile(const std::string& name); bool NeedSemicolon(); void AddMacro(const char* pattern, const char* subst); void LexStringContinuation(); @@ -323,7 +323,7 @@ class Lexer bool freading() const { return freading_; } int fcurrent() const { return state_.inpf->sources_index(); } unsigned fline() const { return state_.fline; } - SourceFile* inpf() const { return state_.inpf.get(); } + sp::SourceFile* inpf() const { return state_.inpf.get(); } unsigned char const* char_stream() const { return state_.pos; } unsigned char const* line_start() const { return state_.line_start; } @@ -348,7 +348,7 @@ class Lexer full_token_t* PushSynthesizedToken(TokenKind kind, const token_pos_t& pos); void SynthesizeIncludePathToken(); void SetFileDefines(std::string file); - void EnterFile(std::shared_ptr&& fp); + void EnterFile(std::shared_ptr&& fp, const token_pos_t& from); void FillTokenPos(token_pos_t* pos); void SkipLineWhitespace(); std::string SkimUntilEndOfLine(tr::vector* macro_args = nullptr); @@ -357,6 +357,7 @@ class Lexer void NeedTokenError(int expected, int got); void SkipUtf8Bom(); void PushLexerState(); + bool IsSameSourceFile(const token_pos_t& a, const token_pos_t& b); full_token_t* advance_token_ptr(); full_token_t* next_token(); @@ -416,11 +417,12 @@ class Lexer private: struct MacroEntry { - sp::Atom* pattern; + sp::Atom* pattern = nullptr; ke::Maybe> args; tr::vector arg_positions; - std::string substitute; + sp::Atom* substitute = nullptr; std::string documentation; + token_pos_t pos; bool deprecated; }; std::shared_ptr FindMacro(sp::Atom* atom); @@ -464,8 +466,8 @@ class Lexer void operator =(const LexerState &) = delete; LexerState& operator =(LexerState&&) = default; - std::shared_ptr inpf; - LREntry inpf_loc; + std::shared_ptr inpf; + sp::LocationRange loc_range; // Visual line in the file. int fline = 0; // Line # for token processing. @@ -483,7 +485,6 @@ class Lexer // Macro specific. std::shared_ptr macro; - std::string pattern; }; LexerState state_; diff --git a/compiler/main.cpp b/compiler/main.cpp index e9f595c50..f627ff742 100644 --- a/compiler/main.cpp +++ b/compiler/main.cpp @@ -132,7 +132,7 @@ int RunCompiler(int argc, char** argv, CompileContext& cc) { assert(options->source_files.size() == 1); { - auto sf = cc.sources()->Open(options->source_files[0], {}); + auto sf = cc.sources()->Open(options->source_files[0]); if (!sf) { report(417) << options->source_files[0]; goto cleanup; diff --git a/compiler/source-file.cpp b/compiler/source-file.cpp index c6d9cf01c..6282a88ee 100644 --- a/compiler/source-file.cpp +++ b/compiler/source-file.cpp @@ -29,6 +29,8 @@ # include #endif +namespace sp { + SourceFile::SourceFile() : pos_(0) { @@ -116,3 +118,5 @@ SourceFile::Eof() { return pos_ == data_.size(); } + +} // namespace sp diff --git a/compiler/source-file.h b/compiler/source-file.h index d546f28f7..5c93f5c25 100644 --- a/compiler/source-file.h +++ b/compiler/source-file.h @@ -27,7 +27,9 @@ #include #include "stl/stl-string.h" -class SourceFile +namespace sp { + +class SourceFile : public std::enable_shared_from_this { friend class SourceManager; @@ -55,12 +57,12 @@ class SourceFile return reinterpret_cast(data_.data()); } + std::shared_ptr to_shared() { return shared_from_this(); } + private: bool Open(const std::string& file_name); void set_sources_index(uint32_t sources_index) { sources_index_ = ke::Some(sources_index); } - uint32_t location_index() const { return location_index_.get(); } - void set_location_index(uint32_t location_index) { location_index_ = ke::Some(location_index); } private: std::string name_; @@ -68,5 +70,6 @@ class SourceFile size_t pos_; bool is_main_file_ = false; ke::Maybe sources_index_; - ke::Maybe location_index_; }; + +} // namespace sp diff --git a/compiler/source-location.h b/compiler/source-location.h index 017170cba..37e39c6d0 100644 --- a/compiler/source-location.h +++ b/compiler/source-location.h @@ -19,6 +19,10 @@ #include +namespace sp { + +class SourceManager; + // An encoded referece to a location in a source file. We keep this structure // as small as feasible since our average script can have hundreds of thousands // of source locations. @@ -27,7 +31,7 @@ class SourceLocation friend class MacroLexer; friend class SourceFile; friend class SourceManager; - friend struct LREntry; + friend struct LocationRange; friend struct Macro; static const uint32_t kInMacro = 0x80000000; @@ -49,8 +53,9 @@ class SourceLocation : id_(0) { } + SourceLocation(const SourceLocation&) = default; - bool IsSet() const { + bool valid() const { return id_ != 0; } bool operator ==(const SourceLocation& other) { @@ -76,3 +81,5 @@ class SourceLocation private: uint32_t id_; }; + +} // namespace sp diff --git a/compiler/source-manager.cpp b/compiler/source-manager.cpp index 32d9f33d3..d25dedbd3 100644 --- a/compiler/source-manager.cpp +++ b/compiler/source-manager.cpp @@ -17,45 +17,73 @@ // SourcePawn. If not, see http://www.gnu.org/licenses/. #include "source-manager.h" +#include + #include #include "errors.h" #include "lexer.h" +using namespace sp; + +namespace sp { + SourceManager::SourceManager(CompileContext& cc) : cc_(cc) { + loc_ranges_.emplace_back(); } -std::shared_ptr SourceManager::Open(const std::string& path, - const token_pos_t& from) -{ +std::shared_ptr SourceManager::Open(const std::string& path) { + for (const auto& other : opened_files_) { + std::error_code ec; + if (std::filesystem::equivalent(path, other->path(), ec)) + return other; + } + auto file = std::make_shared(); if (!file->Open(path)) return nullptr; + file->set_sources_index(opened_files_.size()); + opened_files_.emplace_back(file); + + return file; +} + +LocationRange SourceManager::EnterFile(std::shared_ptr file, const token_pos_t& from) { size_t loc_index; if (!TrackExtents(file->size(), &loc_index)) { report(from, 422); - return nullptr; + return {}; } if (opened_files_.size() >= UINT_MAX) { report(from, 422); - return nullptr; + return {}; } - file->set_sources_index(opened_files_.size()); - opened_files_.emplace_back(file); + loc_ranges_[loc_index].init(from, file.get()); + return loc_ranges_[loc_index]; +} - // :TODO: fix - locations_[loc_index].init({}, file); - file->set_location_index(loc_index); - return file; +LocationRange SourceManager::EnterMacro(const token_pos_t& from, SourceLocation expansion_loc, + Atom* text) +{ + assert(expansion_loc.valid()); + + size_t lr_index; + if (!TrackExtents(text->length(), &lr_index)) { + report(from, 422); + return {}; + } + + loc_ranges_[lr_index].init(from, expansion_loc, text); + return loc_ranges_[lr_index]; } bool SourceManager::TrackExtents(uint32_t length, size_t* index) { // We allocate an extra 2 so we can refer to the end-of-file position without - // colling with the next range. + // colliding with the next range. uint32_t next_source_id; if (!ke::TryUint32Add(next_source_id_, length, &next_source_id) || !ke::TryUint32Add(next_source_id, 2, &next_source_id) || @@ -64,16 +92,59 @@ bool SourceManager::TrackExtents(uint32_t length, size_t* index) { return false; } - *index = locations_.size(); + *index = loc_ranges_.size(); - LREntry tracker; + LocationRange tracker; tracker.id = next_source_id_; - locations_.push_back(tracker); + loc_ranges_.push_back(tracker); next_source_id_ = next_source_id; return true; } -LREntry SourceManager::GetLocationRangeEntryForFile(const std::shared_ptr& file) { - return locations_[file->location_index()]; +bool SourceManager::IsSameSourceFile(const SourceLocation& a, const SourceLocation& b) { + return GetSourceFileIndex(a) == GetSourceFileIndex(b); } + +size_t SourceManager::FindLocRangeSlow(const SourceLocation& loc) { + assert(loc.valid()); + + if (loc_ranges_[last_lr_lookup_].owns(loc)) + return last_lr_lookup_; + + size_t lower = 1; + size_t upper = loc_ranges_.size(); + while (lower < upper) { + size_t mid = (lower + upper) / 2; + const auto& range = loc_ranges_[mid]; + if (loc.offset() < range.id) { + upper = mid; + } else if (loc.offset() > range.id + range.length() + 1) { + // Note +1 for the terminal offset. + lower = mid + 1; + } else { + assert(range.owns(loc)); + last_lr_lookup_ = mid; + return mid; + } + } + + assert(false); + return 0; +} + +uint32_t SourceManager::GetSourceFileIndex(const SourceLocation& loc) { + size_t lr_index = FindLocRange(loc); + if (lr_index == 0) + return 0; + + while (lr_index && loc_ranges_[lr_index].is_macro()) + lr_index = FindLocRange(loc_ranges_[lr_index].expansion_loc()); + + if (!loc_ranges_[lr_index].file()) + return 0; + + return loc_ranges_[lr_index].file()->sources_index(); +} + +} // namespace sp diff --git a/compiler/source-manager.h b/compiler/source-manager.h index 567d27b15..26de4936d 100644 --- a/compiler/source-manager.h +++ b/compiler/source-manager.h @@ -1,6 +1,6 @@ -// vim: set ts=2 sw=2 tw=99 et: +// vim: set ts=4 sw=4 tw=99 et: // -// Copyright (C) 2022 David Anderson +// Copyright (C) 2022 AlliedModders LLC // // This file is part of SourcePawn. // @@ -19,8 +19,10 @@ #include +#include #include +#include "shared/string-pool.h" #include "stl/stl-unordered-map.h" #include "stl/stl-vector.h" #include "source-file.h" @@ -28,22 +30,30 @@ class CompileContext; -struct token_pos_t { - int file_ = 0; +// Of course, we'd love if tokens could just be SourceLocations. But peek_same_line() +// is an extremely hot function, and line checks need to be fast, so we track +// it explicitly. +struct token_pos_t : public sp::SourceLocation { int line = 0; + + token_pos_t() {} + token_pos_t(const SourceLocation& loc, int line) + : SourceLocation(loc), + line(line) + {} }; -// An LREntry is created each time we register a range of locations (it is -// short for LocationRangeEntry). For a file, an LREntry covers each character +namespace sp { + +// Location Range. Design is taken from LLVM. +// +// An LocationRange is created each time we register a range of locations (it is +// short for LocationRangeEntry). For a file, an LocationRange covers each character // in the file, including a position for EOF. For macros, it covers the number // of characters in its token stream, with a position for EOF. // -// LREntries are allocated by calling trackFile() or trackMacro() in the -// SourceManager. -// -// LREntries are not malloc'd, so references must not be held past calls to -// trackFile() or trackMacro(). -struct LREntry +// LREntries are allocated by calling TrackExtents. +struct LocationRange { // Starting id for this source range. uint32_t id; @@ -52,15 +62,22 @@ struct LREntry // If we're creating a range from an #include, this is the location in the // parent file we were #included from, if any. // - // If we're creating a range for macro insertion, this is where we started - // inserting tokens. + // If we're creating a range for macro insertion, this is where the macro was + // defined. SourceLocation parent_; - // If we included from a file, this is where we included. - std::shared_ptr file_; + // If we're creating a range from a macro, this is the insertion point. + SourceLocation expansion_loc_; + + // If we included from a file, this is where we included. No refcount + // needed since SourceFiles are held open by CompileContext. + union { + SourceFile* file_; + Atom* text_; + }; public: - LREntry() + LocationRange() : id(0) {} @@ -68,23 +85,40 @@ struct LREntry return id != 0; } - void init(const SourceLocation& parent, std::shared_ptr file) { + void init(const SourceLocation& parent, SourceFile* file) { this->parent_ = parent; - this->file_ = std::move(file); + this->file_ = file; } - std::shared_ptr file() const { - return file_; + void init(SourceLocation parent, SourceLocation expansion_loc, Atom* text) { + this->parent_ = parent; + this->expansion_loc_ = expansion_loc; + this->text_ = text; } - const SourceLocation& parent() const { - return parent_; + + std::shared_ptr file() const { + if (!is_file()) + return nullptr; + return file_->to_shared(); } + const SourceLocation& parent() const { return parent_; } + const SourceLocation& expansion_loc() const { return expansion_loc_; } + + bool is_macro() const { return expansion_loc_.valid(); } + bool is_file() const { return !expansion_loc_.valid(); } + uint32_t length() const { + if (!valid()) + return 0; + if (is_macro()) + return text_->length(); return file_->size(); } bool owns(const SourceLocation& loc) const { + if (!loc.valid()) + return false; if (loc.offset() >= id && loc.offset() <= id + length()) return true; return false; @@ -95,6 +129,16 @@ struct LREntry assert(offset <= file_->size()); return SourceLocation::FromFile(id, offset); } + + SourceLocation MacroPos(uint32_t offset) const { + assert(text_); + assert(offset <= text_->length()); + return SourceLocation::FromMacro(id, offset); + } + + bool operator ==(const LocationRange& other) const { + return id == other.id; + } }; class SourceManager final @@ -102,32 +146,48 @@ class SourceManager final public: explicit SourceManager(CompileContext& cc); - std::shared_ptr Open(const std::string& path, const token_pos_t& from); + std::shared_ptr Open(const std::string& path); + LocationRange EnterFile(std::shared_ptr file, const token_pos_t& from); - LREntry GetLocationRangeEntryForFile(const std::shared_ptr& file); + // Return a location range for a macro. If the macro has unique text, the + // location range will not be cached. + LocationRange EnterMacro(const token_pos_t& from, SourceLocation expansion_loc, + Atom* text); // For a given token location, retrieve the nearest source file index it maps to. - uint32_t GetSourceFileIndex(const token_pos_t& pos) { - return pos.file_; - } - bool IsSameSourceFile(const token_pos_t& current, const token_pos_t& next) { - return current.file_ == next.file_; - } + uint32_t GetSourceFileIndex(const SourceLocation& loc); + + // Checks whether two tokens are in the same file. Runtime is O(log n) for + // n = # of files opened. + bool IsSameSourceFile(const SourceLocation& a, const SourceLocation& b); const tr::vector>& opened_files() const { return opened_files_; } + // Find the index of the owning LocationRange. 0 is an invalid range. + size_t FindLocRange(const SourceLocation& loc) { + if (loc_ranges_[last_lr_lookup_].owns(loc)) + return last_lr_lookup_; + return FindLocRangeSlow(loc); + } + private: bool TrackExtents(uint32_t length, size_t* index); + size_t FindLocRangeSlow(const SourceLocation& loc); private: CompileContext& cc_; tr::vector> opened_files_; - tr::vector locations_; + tr::vector loc_ranges_; // Source ids start from 1. The source file id is 1 + len(source) + 1. This // lets us store source locations as a single integer, as we can always // bisect to a particular file, and from there, to a line number and column. uint32_t next_source_id_ = 1; + + // One-entry cache for SL lookup. + size_t last_lr_lookup_ = 0; }; + +} // namespace sp