diff --git a/compiler/lexer.cpp b/compiler/lexer.cpp index 6d42542e2..4c6709509 100644 --- a/compiler/lexer.cpp +++ b/compiler/lexer.cpp @@ -251,7 +251,7 @@ Lexer::SynthesizeIncludePathToken() if (!open_c) open_c = '"'; - tok->data = ke::StringPrintf("%c%s", open_c, name); + tok->atom = cc_.atom(ke::StringPrintf("%c%s", open_c, name)); } /* ftoi @@ -469,7 +469,7 @@ void Lexer::HandleDirectives() { } auto tok = PushSynthesizedToken(tSYN_PRAGMA_UNUSED, col); - tok->data = ke::Join(parts, ","); + tok->atom = cc_.atom(ke::Join(parts, ",")); } else { error(207); /* unknown #pragma */ } @@ -1002,6 +1002,7 @@ void Lexer::HandleMultiLineComment() { } void Lexer::packedstring(full_token_t* tok, char term) { + std::string data; while (true) { char c = peek(); if (c == term || c == 0) @@ -1012,19 +1013,20 @@ void Lexer::packedstring(full_token_t* tok, char term) { } if (IsNewline(c)) break; - packedstring_char(tok); + packedstring_char(&data); } + tok->atom = cc_.atom(data); } -void Lexer::packedstring_char(full_token_t* tok) { +void Lexer::packedstring_char(std::string* data) { bool is_codepoint; cell ch = litchar(kLitcharUtf8, &is_codepoint); if (ch < 0) return; if (is_codepoint) - UnicodeCodepointToUtf8(ch, &tok->data); + UnicodeCodepointToUtf8(ch, data); else - tok->data.push_back(static_cast(ch)); + data->push_back(static_cast(ch)); } /* lex(lexvalue,lexsym) Lexical Analysis @@ -1325,7 +1327,6 @@ Lexer::PushSynthesizedToken(TokenKind kind, int col) auto tok = current_token(); tok->id = kind; tok->value = 0; - tok->data.clear(); tok->atom = nullptr; tok->start.line = state_.tokline; tok->start.col = col; @@ -1681,7 +1682,6 @@ bool Lexer::lex_number(full_token_t* tok) { void Lexer::LexStringLiteral(full_token_t* tok, int flags) { tok->id = tSTRING; - tok->data.clear(); tok->atom = nullptr; tok->value = -1; // Catch consumers expecting automatic litadd(). @@ -1693,7 +1693,11 @@ void Lexer::LexStringLiteral(full_token_t* tok, int flags) { error(37); } else { advance(); - packedstring_char(tok); + + std::string data; + packedstring_char(&data); + tok->atom = cc_.atom(data); + /* invalid char declaration */ if (!match_char('\'')) error(27); /* invalid character constant (must be one character) */ @@ -2369,24 +2373,29 @@ cell Lexer::get_utf8_char() { } void Lexer::LexStringContinuation() { + ke::SaveAndSet stop_recursion(&in_string_continuation_, true); + + if (!peek(tELLIPS)) + return; + auto initial = std::move(*current_token()); assert(initial.id == tSTRING); - ke::SaveAndSet stop_recursion(&in_string_continuation_, true); - + std::string data = initial.data(); while (match(tELLIPS)) { if (match(tCHAR_LITERAL)) { - initial.data.push_back(current_token()->value); + data.push_back(current_token()->value); continue; } if (!need(tSTRING)) { lexpush(); break; } - initial.data += current_token()->data; + data += current_token()->data(); } *current_token() = std::move(initial); + current_token()->atom = cc_.atom(data); } bool Lexer::HasMacro(sp::Atom* atom) { diff --git a/compiler/lexer.h b/compiler/lexer.h index e8ed736f0..81d9cdf11 100644 --- a/compiler/lexer.h +++ b/compiler/lexer.h @@ -41,10 +41,12 @@ struct token_pos_t { struct full_token_t { int id = 0; int value = 0; - std::string data; sp::Atom* atom = nullptr; token_pos_t start; token_pos_t end; + const std::string& data() const { + return atom->str(); + } }; #define MAX_TOKEN_DEPTH 4 @@ -366,7 +368,7 @@ class Lexer void lex_float(full_token_t* tok, cell whole); cell litchar(int flags, bool* is_codepoint = nullptr); void packedstring(full_token_t* tok, char term); - void packedstring_char(full_token_t* tok); + void packedstring_char(std::string* data); bool IsSkipping() const { return skiplevel_ > 0 && (ifstack_[skiplevel_ - 1] & SKIPMODE) == SKIPMODE; diff --git a/compiler/parse-node.h b/compiler/parse-node.h index 6e571aa3f..3bea599d3 100644 --- a/compiler/parse-node.h +++ b/compiler/parse-node.h @@ -258,7 +258,7 @@ class ContinueStmt : public Stmt class StaticAssertStmt : public Stmt { public: - explicit StaticAssertStmt(const token_pos_t& pos, Expr* expr, PoolString* text) + explicit StaticAssertStmt(const token_pos_t& pos, Expr* expr, sp::Atom* text) : Stmt(StmtKind::StaticAssertStmt, pos), expr_(expr), text_(text) @@ -270,11 +270,11 @@ class StaticAssertStmt : public Stmt static bool is_a(Stmt* node) { return node->kind() == StmtKind::StaticAssertStmt; } Expr* expr() const { return expr_; } - PoolString* text() const { return text_; } + sp::Atom* text() const { return text_; } private: Expr* expr_; - PoolString* text_; + sp::Atom* text_; }; class Decl : public Stmt @@ -1161,21 +1161,21 @@ class FloatExpr final : public TaggedValueExpr class StringExpr final : public Expr { public: - StringExpr(const token_pos_t& pos, const char* str, size_t len) + StringExpr(const token_pos_t& pos, sp::Atom* atom) : Expr(ExprKind::StringExpr, pos), - text_(new PoolString(str, len)) + text_(atom) {} void ProcessUses(SemaContext&) override {} static bool is_a(Expr* node) { return node->kind() == ExprKind::StringExpr; } - PoolString* text() const { + sp::Atom* text() const { return text_; } private: - PoolString* text_; + sp::Atom* text_; }; class NewArrayExpr final : public Expr diff --git a/compiler/parser.cpp b/compiler/parser.cpp index 092b74cc2..bf0dcb342 100644 --- a/compiler/parser.cpp +++ b/compiler/parser.cpp @@ -149,7 +149,7 @@ Parser::Parse() case tpTRYINCLUDE: { if (!lexer_->need(tSYN_INCLUDE_PATH)) break; - auto name = lexer_->current_token()->data; + auto name = lexer_->current_token()->data(); auto result = lexer_->PlungeFile(name.c_str() + 1, (name[0] != '<'), TRUE); if (!result && tok != tpTRYINCLUDE) { report(417) << name.substr(1); @@ -601,7 +601,7 @@ Parser::parse_pragma_unused() { auto pos = lexer_->pos(); - auto data = std::move(lexer_->current_token()->data); + const auto& data = lexer_->current_token()->data(); std::vector raw_names = ke::Split(data, ","); std::vector names; for (const auto& raw_name : raw_names) @@ -1061,8 +1061,8 @@ Parser::constant() case tRATIONAL: return new FloatExpr(cc_, pos, lexer_->current_token()->value); case tSTRING: { - const auto& str = lexer_->current_token()->data; - return new StringExpr(pos, str.c_str(), str.size()); + const auto& atom = lexer_->current_token()->atom; + return new StringExpr(pos, atom); } case tTRUE: return new TaggedValueExpr(lexer_->pos(), cc_.types()->tag_bool(), 1); @@ -1177,8 +1177,8 @@ Parser::struct_init() Expr* expr = nullptr; switch (lexer_->lex()) { case tSTRING: { - const auto& str = lexer_->current_token()->data; - expr = new StringExpr(pos, str.c_str(), str.size()); + const auto& atom = lexer_->current_token()->atom; + expr = new StringExpr(pos, atom); break; } case tCHAR_LITERAL: @@ -1215,10 +1215,10 @@ Parser::parse_static_assert() if (!expr) return nullptr; - PoolString * text = nullptr; + sp::Atom* text = nullptr; if (lexer_->match(',') && lexer_->need(tSTRING)) { auto tok = lexer_->current_token(); - text = new PoolString(tok->data.c_str(), tok->data.size()); + text = tok->atom; } lexer_->need(')'); @@ -1261,7 +1261,7 @@ Parser::var_init(int vclass) if (lexer_->match(tSTRING)) { auto tok = lexer_->current_token(); - return new StringExpr(tok->start, tok->data.c_str(), tok->data.size()); + return new StringExpr(tok->start, tok->atom); } // We'll check const or symbol-ness for non-sLOCALs in the semantic pass. diff --git a/compiler/semantics.cpp b/compiler/semantics.cpp index 7607f625d..e0b94acd0 100644 --- a/compiler/semantics.cpp +++ b/compiler/semantics.cpp @@ -226,7 +226,7 @@ bool Semantics::CheckPstructDecl(VarDeclBase* decl) { if (ps->args[i]->type.ident == iREFARRAY) { assert(ps->args[i]->type.tag() == types_->tag_string()); - auto expr = new StringExpr(decl->pos(), "", 0); + auto expr = new StringExpr(decl->pos(), cc_.atom("")); init->fields().push_back(new StructInitFieldExpr(ps->args[i]->name, expr, decl->pos())); }