Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Atomize token data. #897

Merged
merged 2 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 22 additions & 13 deletions compiler/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ Lexer::SynthesizeIncludePathToken()

if (!open_c)
open_c = '"';
tok->data = ke::StringPrintf("%c%s", open_c, name);
tok->atom = cc_.atom(ke::StringPrintf("%c%s", open_c, name));
}

/* ftoi
Expand Down Expand Up @@ -469,7 +469,7 @@ void Lexer::HandleDirectives() {
}

auto tok = PushSynthesizedToken(tSYN_PRAGMA_UNUSED, col);
tok->data = ke::Join(parts, ",");
tok->atom = cc_.atom(ke::Join(parts, ","));
} else {
error(207); /* unknown #pragma */
}
Expand Down Expand Up @@ -1002,6 +1002,7 @@ void Lexer::HandleMultiLineComment() {
}

void Lexer::packedstring(full_token_t* tok, char term) {
std::string data;
while (true) {
char c = peek();
if (c == term || c == 0)
Expand All @@ -1012,19 +1013,20 @@ void Lexer::packedstring(full_token_t* tok, char term) {
}
if (IsNewline(c))
break;
packedstring_char(tok);
packedstring_char(&data);
}
tok->atom = cc_.atom(data);
}

void Lexer::packedstring_char(full_token_t* tok) {
void Lexer::packedstring_char(std::string* data) {
bool is_codepoint;
cell ch = litchar(kLitcharUtf8, &is_codepoint);
if (ch < 0)
return;
if (is_codepoint)
UnicodeCodepointToUtf8(ch, &tok->data);
UnicodeCodepointToUtf8(ch, data);
else
tok->data.push_back(static_cast<char>(ch));
data->push_back(static_cast<char>(ch));
}

/* lex(lexvalue,lexsym) Lexical Analysis
Expand Down Expand Up @@ -1325,7 +1327,6 @@ Lexer::PushSynthesizedToken(TokenKind kind, int col)
auto tok = current_token();
tok->id = kind;
tok->value = 0;
tok->data.clear();
tok->atom = nullptr;
tok->start.line = state_.tokline;
tok->start.col = col;
Expand Down Expand Up @@ -1681,7 +1682,6 @@ bool Lexer::lex_number(full_token_t* tok) {

void Lexer::LexStringLiteral(full_token_t* tok, int flags) {
tok->id = tSTRING;
tok->data.clear();
tok->atom = nullptr;
tok->value = -1; // Catch consumers expecting automatic litadd().

Expand All @@ -1693,7 +1693,11 @@ void Lexer::LexStringLiteral(full_token_t* tok, int flags) {
error(37);
} else {
advance();
packedstring_char(tok);

std::string data;
packedstring_char(&data);
tok->atom = cc_.atom(data);

/* invalid char declaration */
if (!match_char('\''))
error(27); /* invalid character constant (must be one character) */
Expand Down Expand Up @@ -2369,24 +2373,29 @@ cell Lexer::get_utf8_char() {
}

void Lexer::LexStringContinuation() {
ke::SaveAndSet<bool> stop_recursion(&in_string_continuation_, true);

if (!peek(tELLIPS))
return;

auto initial = std::move(*current_token());
assert(initial.id == tSTRING);

ke::SaveAndSet<bool> stop_recursion(&in_string_continuation_, true);

std::string data = initial.data();
while (match(tELLIPS)) {
if (match(tCHAR_LITERAL)) {
initial.data.push_back(current_token()->value);
data.push_back(current_token()->value);
continue;
}
if (!need(tSTRING)) {
lexpush();
break;
}
initial.data += current_token()->data;
data += current_token()->data();
}

*current_token() = std::move(initial);
current_token()->atom = cc_.atom(data);
}

bool Lexer::HasMacro(sp::Atom* atom) {
Expand Down
6 changes: 4 additions & 2 deletions compiler/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ struct token_pos_t {
struct full_token_t {
int id = 0;
int value = 0;
std::string data;
sp::Atom* atom = nullptr;
token_pos_t start;
token_pos_t end;
const std::string& data() const {
return atom->str();
}
};

#define MAX_TOKEN_DEPTH 4
Expand Down Expand Up @@ -366,7 +368,7 @@ class Lexer
void lex_float(full_token_t* tok, cell whole);
cell litchar(int flags, bool* is_codepoint = nullptr);
void packedstring(full_token_t* tok, char term);
void packedstring_char(full_token_t* tok);
void packedstring_char(std::string* data);

bool IsSkipping() const {
return skiplevel_ > 0 && (ifstack_[skiplevel_ - 1] & SKIPMODE) == SKIPMODE;
Expand Down
14 changes: 7 additions & 7 deletions compiler/parse-node.h
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ class ContinueStmt : public Stmt
class StaticAssertStmt : public Stmt
{
public:
explicit StaticAssertStmt(const token_pos_t& pos, Expr* expr, PoolString* text)
explicit StaticAssertStmt(const token_pos_t& pos, Expr* expr, sp::Atom* text)
: Stmt(StmtKind::StaticAssertStmt, pos),
expr_(expr),
text_(text)
Expand All @@ -270,11 +270,11 @@ class StaticAssertStmt : public Stmt
static bool is_a(Stmt* node) { return node->kind() == StmtKind::StaticAssertStmt; }

Expr* expr() const { return expr_; }
PoolString* text() const { return text_; }
sp::Atom* text() const { return text_; }

private:
Expr* expr_;
PoolString* text_;
sp::Atom* text_;
};

class Decl : public Stmt
Expand Down Expand Up @@ -1161,21 +1161,21 @@ class FloatExpr final : public TaggedValueExpr
class StringExpr final : public Expr
{
public:
StringExpr(const token_pos_t& pos, const char* str, size_t len)
StringExpr(const token_pos_t& pos, sp::Atom* atom)
: Expr(ExprKind::StringExpr, pos),
text_(new PoolString(str, len))
text_(atom)
{}

void ProcessUses(SemaContext&) override {}

static bool is_a(Expr* node) { return node->kind() == ExprKind::StringExpr; }

PoolString* text() const {
sp::Atom* text() const {
return text_;
}

private:
PoolString* text_;
sp::Atom* text_;
};

class NewArrayExpr final : public Expr
Expand Down
18 changes: 9 additions & 9 deletions compiler/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ Parser::Parse()
case tpTRYINCLUDE: {
if (!lexer_->need(tSYN_INCLUDE_PATH))
break;
auto name = lexer_->current_token()->data;
auto name = lexer_->current_token()->data();
auto result = lexer_->PlungeFile(name.c_str() + 1, (name[0] != '<'), TRUE);
if (!result && tok != tpTRYINCLUDE) {
report(417) << name.substr(1);
Expand Down Expand Up @@ -601,7 +601,7 @@ Parser::parse_pragma_unused()
{
auto pos = lexer_->pos();

auto data = std::move(lexer_->current_token()->data);
const auto& data = lexer_->current_token()->data();
std::vector<std::string> raw_names = ke::Split(data, ",");
std::vector<sp::Atom*> names;
for (const auto& raw_name : raw_names)
Expand Down Expand Up @@ -1061,8 +1061,8 @@ Parser::constant()
case tRATIONAL:
return new FloatExpr(cc_, pos, lexer_->current_token()->value);
case tSTRING: {
const auto& str = lexer_->current_token()->data;
return new StringExpr(pos, str.c_str(), str.size());
const auto& atom = lexer_->current_token()->atom;
return new StringExpr(pos, atom);
}
case tTRUE:
return new TaggedValueExpr(lexer_->pos(), cc_.types()->tag_bool(), 1);
Expand Down Expand Up @@ -1177,8 +1177,8 @@ Parser::struct_init()
Expr* expr = nullptr;
switch (lexer_->lex()) {
case tSTRING: {
const auto& str = lexer_->current_token()->data;
expr = new StringExpr(pos, str.c_str(), str.size());
const auto& atom = lexer_->current_token()->atom;
expr = new StringExpr(pos, atom);
break;
}
case tCHAR_LITERAL:
Expand Down Expand Up @@ -1215,10 +1215,10 @@ Parser::parse_static_assert()
if (!expr)
return nullptr;

PoolString * text = nullptr;
sp::Atom* text = nullptr;
if (lexer_->match(',') && lexer_->need(tSTRING)) {
auto tok = lexer_->current_token();
text = new PoolString(tok->data.c_str(), tok->data.size());
text = tok->atom;
}

lexer_->need(')');
Expand Down Expand Up @@ -1261,7 +1261,7 @@ Parser::var_init(int vclass)

if (lexer_->match(tSTRING)) {
auto tok = lexer_->current_token();
return new StringExpr(tok->start, tok->data.c_str(), tok->data.size());
return new StringExpr(tok->start, tok->atom);
}

// We'll check const or symbol-ness for non-sLOCALs in the semantic pass.
Expand Down
2 changes: 1 addition & 1 deletion compiler/semantics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ bool Semantics::CheckPstructDecl(VarDeclBase* decl) {
if (ps->args[i]->type.ident == iREFARRAY) {
assert(ps->args[i]->type.tag() == types_->tag_string());

auto expr = new StringExpr(decl->pos(), "", 0);
auto expr = new StringExpr(decl->pos(), cc_.atom(""));
init->fields().push_back(new StructInitFieldExpr(ps->args[i]->name, expr,
decl->pos()));
}
Expand Down