Skip to content

Commit

Permalink
WIP: Generate an IR (aka int64 part 1/4).
Browse files Browse the repository at this point in the history
The compiler has come a long way, but we still can't do int64, because
it requires a non-cell storage size. There's no (sane) way to express
conversions between int32 and int64 within the AST, because we have no
uniform way of inserting conversion nodes. This is already a deep
problem that has been hacked around for operator overloads and property
accessors, and it doesn't scale.

The solution is obvious: transform the AST into an IR. That's what we
should have done from the beginning but didn't. Unfortunately it
requires a *lot* of refactoring and a ton of boilerplate. So far, I have
most of the boilerplate done, but the refactoring is only halfway there.
CodeGenerator has not been ported to the IR yet.

Once this gigantic patch is done, we'll have the following changes:

- `struct value` will be eliminated, and good riddance.
- The AST will be immutable after parsing.
- The semantic analysis phase will output a new IR tree.
- CodeGenerator will generate off the IR instead. Since the IR is a
  transformation of the AST, I'm expecting minimal changes to the end
  result.

I'm calling this part 1 of 4, since roughly the steps toward `int64` are:

1. Introduce an IR.
2. Eliminate matchtag and have TypeChecker::Coerce insert conversion
   nodes.
3. Refactor the VM to support wide ALU ops (either stack-based ops or
   make PRI/ALT dynamically sized).
4. Introduce new 64-bit wide types.
  • Loading branch information
dvander committed Jul 30, 2024
1 parent fd82555 commit 1a7a3fc
Show file tree
Hide file tree
Showing 18 changed files with 1,768 additions and 1,176 deletions.
1 change: 1 addition & 0 deletions compiler/AMBuilder
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ module.sources += [
'data-queue.cpp',
'errors.cpp',
'expressions.cpp',
'ir.cpp',
'lexer.cpp',
'main.cpp',
'name-resolution.cpp',
Expand Down
53 changes: 53 additions & 0 deletions compiler/ast-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
// 3. This notice may not be removed or altered from any source distribution.
#pragma once

#include <stdint.h>

#define AST_STMT_TYPE_LIST(FOR_EACH) \
FOR_EACH(StmtList) \
FOR_EACH(BlockStmt) \
Expand Down Expand Up @@ -79,6 +81,47 @@
FOR_EACH(StructExpr) \
FOR_EACH(StructInitFieldExpr)

#define IR_NODE_TYPE_LIST(FOR_EACH) \
/* Decls */ \
FOR_EACH(Variable) \
/* Statements */ \
FOR_EACH(Return) \
FOR_EACH(ValueInsn) \
FOR_EACH(Exit) \
FOR_EACH(Break) \
FOR_EACH(Continue) \
FOR_EACH(Assert) \
FOR_EACH(If) \
FOR_EACH(DoWhile) \
FOR_EACH(Delete) \
FOR_EACH(ForLoop) \
FOR_EACH(Switch) \
FOR_EACH(FunctionDef) \
/* Values */ \
FOR_EACH(ConstVal) \
FOR_EACH(CharArrayLiteral) \
FOR_EACH(VariableRef) \
FOR_EACH(TypeRef) \
FOR_EACH(FunctionRef) \
FOR_EACH(IndexOp) \
FOR_EACH(Load) \
FOR_EACH(TernaryOp) \
FOR_EACH(BinaryOp) \
FOR_EACH(Array) \
FOR_EACH(CommaOp) \
FOR_EACH(CallOp) \
FOR_EACH(TempRef) \
FOR_EACH(PropertyRef) \
FOR_EACH(FieldRef) \
FOR_EACH(UnaryOp) \
FOR_EACH(CallUserOp) \
FOR_EACH(IncDecOp) \
FOR_EACH(Store) \
FOR_EACH(ThisRef)

namespace sp {
namespace cc {

enum class ExprKind : uint8_t
{
#define _(Name) Name,
Expand All @@ -92,3 +135,13 @@ enum class StmtKind : uint8_t
AST_STMT_TYPE_LIST(_)
#undef _
};

enum class IrKind : uint8_t
{
#define _(Name) Name,
IR_NODE_TYPE_LIST(_)
#undef _
};

} // namespace cc
} // namespace sp
4 changes: 2 additions & 2 deletions compiler/code-generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ CodeGenerator::EmitExpr(Expr* expr)
EmitNewArrayExpr(expr->to<NewArrayExpr>());
break;
case ExprKind::NamedArgExpr:
EmitExpr(expr->to<NamedArgExpr>()->expr);
EmitExpr(expr->to<NamedArgExpr>()->expr());
break;

default:
Expand Down Expand Up @@ -768,7 +768,7 @@ CodeGenerator::EmitBinaryInner(int oper_tok, const UserOperation& in_user_op, Ex
// ALT if it can't be re-evaluated.
bool must_save_lhs = oper_tok || !left_val.canRematerialize();
if (right_val.ident == iCONSTEXPR) {
if (commutative(oper_tok)) {
if (IsOperTokenCommutative(oper_tok)) {
__ const_alt(right_val.constval());
user_op.swapparams ^= true;
} else {
Expand Down
7 changes: 4 additions & 3 deletions compiler/compile-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include <memory>
#include <string>
#include <string_view>
#include <unordered_set>

#include "array-data.h"
Expand Down Expand Up @@ -74,15 +75,15 @@ class CompileContext final
TypeManager* types() const { return types_.get(); }
StringPool* atoms() { return &atoms_; }

Atom* atom(const std::string& str) {
return atoms_.add(str);
}
Atom* atom(const char* str, size_t length) {
return atoms_.add(str, length);
}
Atom* atom(const char* str) {
return atoms_.add(str);
}
Atom* atom(std::string_view sv) {
return atoms_.add(sv);
}

const std::string& default_include() const { return default_include_; }
void set_default_include(const std::string& file) { default_include_ = file; }
Expand Down
166 changes: 1 addition & 165 deletions compiler/expressions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,168 +40,6 @@
namespace sp {
namespace cc {

/* Function addresses of binary operators for signed operations */
static const int op1[17] = {
// hier3
'*', '/', '%',
// hier4
'+', '-',
// hier5
tSHL, tSHR, tSHRU,
// hier6
'&',
// hier7
'^',
// hier8
'|',
// hier9
tlLE, tlGE, '<', '>',
// hier10
tlEQ, tlNE
};

static inline bool MatchOperator(int oper, FunctionDecl* fun, Type* type1, Type* type2,
int numparam)
{
if (!oper)
numparam = 1;

const auto& args = fun->args();
if (args.size() != size_t(numparam))
return false;

assert(numparam == 1 || numparam == 2);
Type* types[2] = { type1, type2 };

for (int i = 0; i < numparam; i++) {
if (args[i]->type_info().is_varargs)
return false;
if (args[i]->type_info().type != types[i])
return false;
}

if (!oper && fun->type() != type2)
return false;
return true;
}

bool find_userop(SemaContext& sc, int oper, Type* type1, Type* type2, int numparam,
const value* lval, UserOperation* op)
{
static const char* binoperstr[] = {"*", "/", "%", "+", "-", "", "", "", "",
"", "", "<=", ">=", "<", ">", "==", "!="};
static const bool binoper_savepri[] = {false, false, false, false, false, false, false, false,
false, false, false, true, true, true, true, false,
false};
static const char* unoperstr[] = {"!", "-", "++", "--"};
static const int unopers[] = {'!', '-', tINC, tDEC};

char opername[4] = "";
size_t i;
bool savepri, savealt;

if (type1->isReference())
type1 = type1->inner();
if (type2 && type2->isReference())
type2 = type2->inner();

/* since user-defined operators on untagged operands are forbidden, we have
* a quick exit.
*/
assert(numparam == 1 || numparam == 2);
if (sc.cc().in_preprocessor())
return false;
if (type1->isInt() && (numparam == 1 || type2->isInt()))
return false;

savepri = savealt = false;
/* find the name with the operator */
if (numparam == 2) {
if (oper == 0) {
/* assignment operator: a special case */
strcpy(opername, "=");
if (lval != NULL && (lval->ident == iARRAYCELL || lval->ident == iARRAYCHAR))
savealt = true;
} else {
assert((sizeof binoperstr / sizeof binoperstr[0]) == (sizeof op1 / sizeof op1[0]));
for (i = 0; i < sizeof op1 / sizeof op1[0]; i++) {
if (oper == op1[i]) {
strcpy(opername, binoperstr[i]);
savepri = binoper_savepri[i];
break;
}
}
}
} else {
assert(oper);
assert(numparam == 1);
/* try a select group of unary operators */
assert((sizeof unoperstr / sizeof unoperstr[0]) == (sizeof unopers / sizeof unopers[0]));
if (opername[0] == '\0') {
for (i = 0; i < sizeof unopers / sizeof unopers[0]; i++) {
if (oper == unopers[i]) {
strcpy(opername, unoperstr[i]);
break;
}
}
}
}
/* if not found, quit */
if (opername[0] == '\0')
return false;

// :TODO: restrict this to globals.
auto opername_atom = sc.cc().atom(opername);
Decl* chain = FindSymbol(sc, opername_atom);
if (!chain)
return false;

FunctionDecl* decl = nullptr;
bool swapparams;
bool is_commutative = commutative(oper);
for (auto iter = chain; iter; iter = iter->next) {
auto fun = iter->as<FunctionDecl>();
if (!fun)
continue;
fun = fun->canonical();

bool matched = MatchOperator(oper, fun, type1, type2, numparam);
bool swapped = false;
if (!matched && is_commutative && type1 != type2 && oper) {
matched = MatchOperator(oper, fun, type2, type1, numparam);
swapped = true;
}
if (matched) {
decl = fun;
swapparams = swapped;
break;
}
}

if (!decl)
return false;

/* we don't want to use the redefined operator in the function that
* redefines the operator itself, otherwise the snippet below gives
* an unexpected recursion:
* fixed:operator+(fixed:a, fixed:b)
* return a + b
*/
if (decl == sc.func()) {
report(408);
}

markusage(decl, uREAD);

op->sym = decl;
op->oper = oper;
op->paramspassed = (oper == 0) ? 1 : numparam;
op->savepri = savepri;
op->savealt = savealt;
op->swapparams = swapparams;
return true;
}

bool checktag_string(Type* type, const value* sym1) {
if (sym1->type()->isArray())
return false;
Expand Down Expand Up @@ -577,9 +415,7 @@ bool checktag(Type* type, Type* expr_type) {
* precautionary "push" of the primary register is scrapped and the constant
* is read into the secondary register immediately.
*/
int
commutative(int oper)
{
bool IsOperTokenCommutative(int oper) {
switch (oper) {
case '+':
case '*':
Expand Down
4 changes: 1 addition & 3 deletions compiler/expressions.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,8 @@ int NextExprOp(Lexer* lexer, int* opidx, int* list);
struct UserOperation;
bool find_userop(SemaContext& sc, int oper, Type* type1, Type* type2, int numparam,
const value* lval, UserOperation* op);
bool find_userop(SemaContext& sc, int oper, int tag1, int tag2, int numparam,
const value* lval, UserOperation* op);

int commutative(int oper);
bool IsOperTokenCommutative(int oper);
cell calc(cell left, int oper_tok, cell right, char* boolresult);
bool IsValidIndexType(Type* type);
bool matchtag(int formaltag, int actualtag, int flags);
Expand Down
Loading

0 comments on commit 1a7a3fc

Please sign in to comment.