From 0077c0372584ce50f655fabcf2ffc417d8e3a7d7 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Thu, 28 Jul 2022 23:00:14 +0100 Subject: [PATCH 001/395] WIP: a re-implementation of the compiler backend --- compiler/front/main.nim | 11 +- compiler/sem/sighashes.nim | 12 +- compiler/vm/README.md | 170 +++ compiler/vm/cbackend2.nim | 372 +++++++ compiler/vm/cgen2.nim | 1301 +++++++++++++++++++++++ compiler/vm/irdbg.nim | 95 ++ compiler/vm/irgen.nim | 1498 ++++++++++++++++++++++++++ compiler/vm/irpasses.nim | 1175 +++++++++++++++++++++ compiler/vm/irtypes.nim | 143 +++ compiler/vm/vm_enums.nim | 2 + compiler/vm/vmir.nim | 2056 ++++++++++++++++++++++++++++++++++++ lib/system.nim | 4 +- 12 files changed, 6829 insertions(+), 10 deletions(-) create mode 100644 compiler/vm/README.md create mode 100644 compiler/vm/cbackend2.nim create mode 100644 compiler/vm/cgen2.nim create mode 100644 compiler/vm/irdbg.nim create mode 100644 compiler/vm/irgen.nim create mode 100644 compiler/vm/irpasses.nim create mode 100644 compiler/vm/irtypes.nim create mode 100644 compiler/vm/vmir.nim diff --git a/compiler/front/main.nim b/compiler/front/main.nim index 0f9137eac78..99e00fe2ea3 100644 --- a/compiler/front/main.nim +++ b/compiler/front/main.nim @@ -62,6 +62,8 @@ import compiler/ic/[ ] from compiler/ic/ic import rodViewer +from compiler/vm/cbackend2 import nil + when not defined(leanCompiler): import compiler/backend/jsgen, @@ -146,7 +148,7 @@ proc commandCompileToC(graph: ModuleGraph) = let conf = graph.config extccomp.initVars(conf) semanticPasses(graph) - if conf.symbolFiles == disabledSf: + if conf.symbolFiles == disabledSf and not isDefined(graph.config, "cbackend2"): registerPass(graph, cgenPass) if {optRun, optForceFullMake} * conf.globalOptions == {optRun} or isDefined(conf, "nimBetterRun"): @@ -154,6 +156,9 @@ proc commandCompileToC(graph: ModuleGraph) = # nothing changed graph.config.notes = graph.config.mainPackageNotes return + else: + graph.config.exc = excGoto # only goto exceptions are support for the new cbackend + registerPass(graph, cbackend2.cgen2Pass) if not extccomp.ccHasSaneOverflow(conf): conf.defineSymbol("nimEmulateOverflowChecks") @@ -161,7 +166,9 @@ proc commandCompileToC(graph: ModuleGraph) = compileProject(graph) if graph.config.errorCounter > 0: return # issue #9933 - if conf.symbolFiles == disabledSf: + if isDefined(graph.config, "cbackend2"): + cbackend2.generateCode(graph) + elif conf.symbolFiles == disabledSf: cgenWriteModules(graph.backend, conf) else: if isDefined(conf, "nimIcIntegrityChecks"): diff --git a/compiler/sem/sighashes.nim b/compiler/sem/sighashes.nim index 0f7081b965a..dc3954a19f3 100644 --- a/compiler/sem/sighashes.nim +++ b/compiler/sem/sighashes.nim @@ -51,9 +51,9 @@ type CoDistinct CoHashTypeInsideNode -proc hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) +func hashType(c: var MD5Context, t: PType; flags: set[ConsiderFlag]) -proc hashSym(c: var MD5Context, s: PSym) = +func hashSym(c: var MD5Context, s: PSym) = if sfAnon in s.flags or s.kind == skGenericParam: c &= ":anon" else: @@ -63,7 +63,7 @@ proc hashSym(c: var MD5Context, s: PSym) = c &= "." it = it.owner -proc hashTypeSym(c: var MD5Context, s: PSym) = +func hashTypeSym(c: var MD5Context, s: PSym) = if sfAnon in s.flags or s.kind == skGenericParam: c &= ":anon" else: @@ -76,7 +76,7 @@ proc hashTypeSym(c: var MD5Context, s: PSym) = c &= "." it = it.owner -proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]) = +func hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]) = if n == nil: c &= "\255" return @@ -103,7 +103,7 @@ proc hashTree(c: var MD5Context, n: PNode; flags: set[ConsiderFlag]) = else: for i in 0.. 1) order by hash; -proc hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash = +func hashType*(t: PType; flags: set[ConsiderFlag] = {CoType}): SigHash = var c: MD5Context md5Init c hashType c, t, flags+{CoOwnerSig} diff --git a/compiler/vm/README.md b/compiler/vm/README.md new file mode 100644 index 00000000000..15057e0f01f --- /dev/null +++ b/compiler/vm/README.md @@ -0,0 +1,170 @@ + +All of the IR related work is currently located in the `vm` directory, since the IR originally started out as an IR for only the VM. + +Things to consider when reading the code: +* more or less everything is still a work in progress +* there's a lot of debug code lying around +* there's lots of outdated and stale code +* most outdated or unfinished parts aren't marked as such +* most of the existing documentation is unfinished or outdated +* some comments (general ones, `XXX` and `TODO`) are outdated and don't apply anymore + +The general approach for the new compiler backend is the following: +* take the `PNode`-AST that comes out of `transf` and translate it into a dedicated intermediate representation used until code generation +* apply transformations not specific to a target to the IR +* apply target-specific transformations to the IR +* pass the IR to the target's code-generator + +Some future directions and missing pieces are also documented here, but not all of them. + +## Overview of the added modules + +### `vmir.nim` + +The module still has it's original name, but the IR is fully decoupled from the VM. A lot of old code from previous iterations is in here. + +The relevant parts are: +* `IrNode3`: the type of the IR's node structure. Currently a variant object, but planned to move to a more generalized representation. +* `IrStore3`: stores the nodes together with the extra data needed (referenced symbols, join target, etc.). Generally refered to as **the** IR. A simple mechanism for tracking where in the compiler each `IrNode3` was added (or modified). This currently just uses stack-traces, but is planned to be expanded into a more proper facility to track node modifications. +* `BuiltinCall`: meant as an extension to magics, except that they're only needed in the backend. Introduced in order to not require changes to the `TMagic` enum. +* `IrCursor`: API to modify the IR. It first records all modifications and then applies them. Not very efficient right now. This might replace the older `irXXX`-procedure based IR generation (I'm not sure yet) + +Other parts: +* `genCodeV3Exec`: a very early prototype of a code-generator for the VM, taking the IR as input and producing VM bytecode +* `IrStore`, `IrNode2`: old attempts at the IR. Still kept around since there are some parts that might be reused +* `computeInlining`: an early, non-working prototype of computing the most memory efficient IR generation order in the context of procedure inlining + +#### IR overview: +* the IR is a linear node-based representation +* nodes reference each other via indices. A node can only reference nodes coming before it; reference cycles are forbidden +* it's still undecided if a node may be referenced multiple times +* control-flow is represented via gotos and joins. Instead of storing the index of the corresponding `join` target, a `goto` stores an index (`JoinPoint`) into a list storing the actual IR indices. This is aimed at making IR modification simpler, by removing the need to patch goto targets in the IR directly. +* there exist a few special experimental gotos (goto-link-back, goto-with-continuation, goto-active-continuation) meant for more efficient `finally` handling +* the implementation is currently not very data-oriented, but the plan is to move it there eventually. + + +### `irgen.nim` + +A heavily modified copy of `vmgen.nim`. Takes a post-`transf` procedure's `PNode`-AST as the input and translates it to the IR. Traverses the input AST and emits IR instructions via the various `irXXX` procedures. + +`irXXX` procedures defined in `irgen` are experimental and don't have a corresponding IR instruction (yet), but use a combination of other IR instructions. + +### `irpasses.nim` + +Implements the existing IR transformations. This currently includes general transformations as well as target specific ones. The target-specific passes will be moved to their own modules later on. Most transformations are exposed via the `LinearPass` interface. + +None of existing passes are finished yet. + +`runV2` implements the base for static-control-flow-based analysis (e.g. alias analysis, last-read-write analysis, escape analysis, etc.). + +An overview of the current passes: + +#### `computeAliases` + +Old, unused, and defunct early prototype of the alias analysis. + +#### `computeDestructors` + +Old, unused, and defunct early prototype of the destructor injection pass. + +#### `hookPass` + +Meant to replace assignments and `mDestroy` calls with calls to the `=copy`, `=sink` and `=destroy` hooks (if they are present). This is a general pass that applies to all targets and garbage collectors. + +Introduced before `LinearPass2` existed and is thus missing the `mDestroy` patching. + +Currently also ignores whether or not a hook is trivial and thus replaces the assignment for types that don't actually need/use a `=copy` hook. + +#### `refcPass` + +Used when the `refc` GC is enabled. Transforms `ref` (and erroneously also `seq` and `string`) assignments; lifts GC visit procs; lowers `new`, etc + +The GC visit procedure lifting is not yet implemented. + +Doing the lifting in a separate pass would allow for the `refcPass` to also be applicable to the `markAndSweep` and `boehm` GCs. With some additional adjustments the `go` could also be included in that list. + +#### `seqsV1Pass` + +Lowers `seq`s to `PGenericSeq` based operations. Meant for the C-like targets and used when `optSeqDestructors` (implied by ARC/ORC) is not enabled. + +All references to `seq` types need to be rewritten to use the `PGenericSeq` based type. Since seqs are generic, the new types need to be created as part of the pass (implemented). + +The types of locals, globals an parametes are currently adjusted (in a rather in-elegant manner), but field types also need to adjusted! This is not easily doable with the current surrounding architecture and a rewrite of how the types are adjusted is planned. + +A better approach for the type rewriting part would be to introduce a new kind of pass that operates only on types. `cbackend2` is then responsible for collecting all used types (this makes sense in general). The types could then also be run through a unification step, since most of the time, multiple `PType` instances exist for the exact same type (the VM also does this unification). + +As a further improvement, the IR should use it's own representation of types and symbols (a very early prototype exists in `irtypes.nim`). + +#### `seqsV2Pass` + +Same as `seqsV1Pass`, but uses `NimSeqV2`. Also meant for the C-like targets and used when `optSeqDestructors` is enabled. + + +#### `typeV1Pass` + +Creates globals to hold the used RTTI and also generates their initialization logic. Transforms usage of `mGetTypeInfo` to use the introduced globals. Meant for the C-like and JS targets. + +The initialization logic generation is not implemented yet. + + +#### `lowerTestError` + +Injects the pieces necessary for the `exceptions:goto` implementation on the C-like and JS targets. Not needed for the VM. + +#### `lowerRangeCheckPass` + +Transforms `bcRangeCheck` (coming from `nkRangeChck`) into comparisons plus `raiseRangeErrorXXX` calls. Meant to be a general pass, applying to all targets (except maybe the VM, since it currently does the checks at the instruction level). + +#### `lowerSetsPass` + +Lowers `set` operations into bit operations on integers and arrays. Meant for the C-like targets. + +The required replacing of `set` types is missing. + +### `irtypes.nim` + +Very early and not yet used prototype of a type representation for the backend. + +Using a dedicated type (symbols too) representation for the backend stage would mean that `PType` (and `PSym`) no longer has to accomodate for backend specific needs. + +It also allows for using a more linear, data-oriented approach for storing the types without having to adjust all of the compiler. Since the backend is written with a data-oriented approach in mind, it would greatly benefit from this. + + +### `cbackend2.nim` + +A copy of `vmbacked.nim` with the VM related bits removed. Adjusted to use the IR and `cgen2`. + +Points of interest: +* `generateCode`: orchestrates IR generation for all alive procedures (`method` handling is missing) and calls the code-generator (`cgen2`) + +**Note**: the DCE implementation currently runs before any IR transformations took place and thus doesn't know about used compilerprocs and some magics. + +Semantic analysis and backend processing happen separate from each other. That is, first the semantic analysis for the whole program is performed and only then is the backend executed. + +Pros: +* makes it easier to reason about the compiler +* compilerprocs in `system.nim` can be declared in any order, since once the backend is reached, all of them are available +* whole-program optimizations become possible + +Cons: +* higher memory usage, since the bodies of all semantically analysed procedures need to be kept alive until the backend stage +* errors occuring in the back-end are only reported much later (but these error should only be internal ones, this shouldn't be a problem in practice) + +For what it's worth, the VM backend (`vmbackend`) and the `PackedNode`-based backend (`cbackend`) meant for IC both also use the approach described here. + +The backend only supports running it against the whole program right now, but it's written in a way that makes it easy to support smaller, more granular working sets. + +### `cgen2.nim` + +The code-generator for the C target. It takes the IR for all procedures in a module, translates them to a simple AST, and then emits the latter to the given output file. + +The first iteration concatenated strings together with the plan to move to an AST-based approach later on, but I quickly figured that doing the switch already would make thing much simpler (and it did!). Some remnants of the original approach are still visible however. + +Types currently use their own IR in order to make the whole dependency discovery easier. I'd consider the whole approach to type handling here wrong however. Figuring out the order in which types need to be emitted is not a problem specific to the C target and should thus be done outside of the C code-generator (this is also what's planned). + +Instead of always writing to a file, it might make sense to pass `emitModuleToFile` a `Stream` instead and let the caller decide on where the output should go. + +Points of interest: +* `genCode`: translates the input IR to the simple C AST +* `genCTypeDecl`: translates `PType` to a `CDecl` +* `emitModuleToFile`: the main entry point into the code generator. Orchestrates the C AST and type declaration generation and then emits everything in the correct order. \ No newline at end of file diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim new file mode 100644 index 00000000000..d11f131ee07 --- /dev/null +++ b/compiler/vm/cbackend2.nim @@ -0,0 +1,372 @@ +import + std/[ + intsets, + tables + ], + compiler/ast/[ + ast, + ast_types, + astalgo, # for `getModule` + idents, + reports + ], + compiler/backend/[ + extccomp + ], + compiler/front/[ + msgs, + options + ], + compiler/modules/[ + magicsys, + modulegraphs + ], + compiler/sem/[ + passes, + transf + ], + compiler/utils/[ + pathutils + ], + compiler/vm/[ + irgen, + vmir, + cgen2, + irpasses, + irdbg + ], + experimental/[ + results + ] + +import std/options as stdoptions + +type + CodeFragment = object + ## The state required for generating code in multiple steps. + ## `CodeFragment` helps when generating code for multiple procedures in + ## an interleaved manner. + prc: PProc + irs: IrStore3 + + Module = object + stmts: seq[PNode] ## top level statements in the order they were parsed + sym: PSym ## module symbol + + initGlobalsCode: CodeFragment ## the bytecode of `initGlobalsProc`. Each + ## encountered `{.global.}`'s init statement gets code-gen'ed into the + ## `initGlobalCode` of the module that owns it + initGlobalsProc: (PSym, IrStore3) ## the proc that initializes `{.global.}` + ## variables + initProc: (PSym, IrStore3) ## the module init proc (top-level statements) + + ModuleListRef = ref ModuleList + ModuleList = object of RootObj + modules: seq[Module] + modulesClosed: seq[int] ## indices into `modules` in the order the modules + ## were closed. The first closed module comes + ## first, then the next, etc. + moduleMap: Table[int, int] ## module sym-id -> index into `modules` + + ModuleRef = ref object of TPassContext + ## The pass context for the VM backend. Represents a reference to a + ## module in the module list + list: ModuleListRef + index: int + +func growBy[T](x: var seq[T], n: Natural) {.inline.} = + x.setLen(x.len + n) + +iterator cpairs[T](s: seq[T]): (int, lent T) = + ## Continous pair iterator. Supports `s` growing during iteration + var i = 0 + while i < s.len: + yield (i, s[i]) + inc i + +func collectRoutineSyms(ast: PNode, syms: var seq[PSym]) = + ## Traverses the `ast`, collects all symbols that are of routine kind and + ## appends them to `syms` + if ast.kind == nkSym: + if ast.sym.kind in routineKinds: + syms.add(ast.sym) + + return + + for i in 0.. 1: newTree(nkStmtList, module.stmts) + elif module.stmts.len == 1: module.stmts[0] + else: newNode(nkEmpty) + + let tn = transformStmt(c.graph, c.idgen, c.module, ast) + let r = c.genStmt(tn) + + if unlikely(r.isErr): + config.localReport(r.takeErr) + + c.endProc() + + # the `initProc` symbol is missing a valid `ast` field + module.initProc[0] = newSym(skProc, getIdent(c.graph.cache, "init"), nextSymId c.idgen, module.sym, module.sym.info) + module.initProc[1] = c.irs + +proc generateCodeForProc(c: var TCtx, s: PSym): IrGenResult = + assert s != nil + #debugEcho s.name.s, "(", s.kind, "): ", c.config.toFileLineCol(s.info) + let body = transformBody(c.graph, c.idgen, s, cache = false) + c.irs.reset() + result = genProc(c, s, body) + +proc unwrap[T](c: TCtx, r: Result[T, SemReport]): T = + if r.isErr: + c.config.localReport(r.takeErr) + else: + result = r.unsafeGet + +proc generateGlobalInit(c: var TCtx, f: var CodeFragment, defs: openArray[PNode]) = + ## Generates and emits code for the given `{.global.}` initialization + ## statements (`nkIdentDefs` in this case) into `f` + template swapState() = + #swap(c.code, f.code) + #swap(c.debug, f.debug) + swap(c.prc, f.prc) + + # In order to generate code into the fragment, the fragment's state is + # swapped with the `TCtx`'s one + swapState() + + for def in defs.items: + assert def.kind == nkIdentDefs + for i in 0.. 0: + for it in nextProcs.items: + let mIdx = it.itemId.module + let realIdx = mlist.moduleMap[it.getModule().id] + + if g.getBody(it).kind == nkEmpty: + # a quick fix to not run `irgen` for 'importc'ed procs + moduleProcs[realIdx].add((it, IrStore3())) + continue + + let ir = generateCodeForProc(c, it) + collectRoutineSyms(c.unwrap ir, nextProcs2, seenProcs) + + #doAssert mIdx == realIdx + moduleProcs[realIdx].add((it, c.unwrap ir)) + + nextProcs.setLen(0) + swap(nextProcs, nextProcs2) + + let entryPoint = + generateMain(c, g.getModule(conf.projectMainIdx), mlist[]) + + var lpCtx = LiftPassCtx(graph: g, idgen: g.idgen, cache: g.cache) + + for i in 0.. CAst, CDecl +## * emit: write CAst and CDecl to file + +import + std/[ + hashes, + packedsets, + sets, + strformat, + tables + ], + + compiler/ast/[ + ast_types, + ast_query, + types, + trees + ], + compiler/front/[ + options, + msgs + ], + compiler/ic/[ + bitabs + ], + compiler/sem/[ + sighashes + ], + compiler/utils/[ + int128, + pathutils + ], + compiler/vm/[ + vmir, + irdbg + ] + +from compiler/modules/modulegraphs import `$` +from compiler/vm/vmdef import unreachable +from compiler/vm/vmaux import getEnvParam + +from compiler/vm/irpasses import computeTypes, PassError + +type + TypeKey = distinct PType + + TypeSet = Table[int, PType] # TODO: only store unique types? + SymSet = Table[int, PSym] # TODO: use a `HashSet` + + LocalGenCtx = object + ## Mutable environmental state that only applies to the current item + ## (routine) that is being code-gen'ed + + ModuleCtx = object + ## Environmental state that is local to the module the IR being processed + ## lies in. + + # TODO: use a data structure more efficient than `TIdTable`. Maybe + # translating to unique type, similar to how types in the VM + # are handled, make sense here + types: OrderedTable[int, PType] # all used type for the module + + syms: Table[int, PSym] ## symbol-id -> symbol. All used symbols that need to be declared in the C code. # TODO: should be a SymSet + + # TODO: header paths can be very regular. Maybe a CritBitTree[void} would make sense here? + # TODO: the header includes are currently emitted in an arbitrary order, is that okay? (check the old cgen) + headers: HashSet[string] ## all headers the module depends on + + # TODO: rename + FuncId = distinct uint32 + + + CAstNodeKind = enum + ## Syntactic node + # TODO: reorder + cnkError # XXX: temporary. used to encode missing code-generator logic in the output + cnkStmtList + + cnkIf + cnkWhile + cnkSwitch + cnkCase + cnkCall + cnkReturn + cnkGoto + + cnkBraced # braced anonymous initializer + + cnkCast + + cnkLabel # a "label x" + + cnkDotExpr + + cnkStrLit # string literal + cnkIntLit + + cnkDef + cnkType + cnkTernary + cnkInfix + cnkPrefix + cnkIdent + cnkBracket + + # XXX: hmm, 3 byte wasted for padding + CAst = seq[tuple[kind: CAstNodeKind, a, b: uint32]] + + # TODO: use the correct names for the syntax constructs + CDeclAstNodeKind = enum + ## Basic AST for describing both the content of structs and types in C. + ## Not really a syntax tree. + cdnkStruct + cdnkField # type-decl + ident + cdnkUnion + + cdnkEmpty + + cdnkType # references another type via a ``CTypeId`` + cdnkWeakType # a "weak" reference to a type, meaning that a definition of + # the type doesn't have to be present in the translation unit + + cdnkIntLit # a unsigned integer literal (`a` encodes the high and `b` the low bits) + + cdnkFuncPtr # function-ptr type decl + cdnkPtr # XXX: strictly speaking, the `*` is part of the declarator and not of the specifier + cdnkBracket + cdnkIdent + + CDecl = seq[tuple[kind: CDeclAstNodeKind, a, b: uint32]] + + CTypeDesc = distinct CDecl + + + CTypeId = distinct uint32 + + CIdent = LitId ## An identifier in the generated code + + CTypeInfo = object + decl: CDecl + name: CIdent # + + ProcHeader = object + returnType: CTypeId + args: seq[tuple[typ: CTypeId, name: CIdent]] + + IdentCache = BiTable[string] + + CTypeNode = object + # TODO: encode whether the type is a pointer via the `name` field + case isPtr: bool + of true: nil + of false: + name: CIdent + + isImmutable: bool # TODO: merge both bools into a `set` + isVolatile: bool + + GlobalGenCtx = object + ## Environment state that applies to all to all code, independent from + ## which routine or module the code is in. + + # TODO: BiTable might not be the best choice. It recomputes the hash for the + # key on each entry comparision, since it doesn't store key hashes. + # `idents.IdentCache` could be used here, but it's very tailored to Nim (and + # also a `ref` type). Maybe a hash-table overlay (see + # ``vmdef.TypeTable``) is a better choice here + idents: IdentCache # identifiers used in the generated C-code + strings: BiTable[string] + + rttiV1: Table[TypeKey, CIdent] # run-time-type-information requested by the IR being processed. + rttiV2: Table[TypeKey, CIdent] + + funcMap: Table[int, int] ## symbol-id -> index into `procs` # TODO: a table is maybe the wrong data structure here. + funcs: seq[ProcHeader] + + ctypeMap: Table[TypeKey, CTypeId] # + ctypes: seq[CTypeInfo] # + + defered: seq[(PType, CTypeId)] + + CAstBuilder = object + ast: CAst + +const VoidCType = CTypeId(0) +const StringCType = CTypeId(1) + +const InvalidCIdent = CIdent(0) # warning: this depends on a implementation detail of `BiTable` + +func hash(a: TypeKey): Hash = + hash(PType(a).itemId) + +func `==`(a, b: TypeKey): bool = + a.PType.itemId == b.PType.itemId + +func `==`(a, b: CTypeId): bool {.borrow.} + +func mangledName(sym: PSym): string = + # TODO: cache the mangled names (and don't use TLoc for it!) + # TODO: implement + sym.name.s + +const BaseName = "Sub" ## the name of the field for the base type + +func add(decl: var CDecl, k: CDeclAstNodeKind; a, b: uint32 = 0) = + decl.add((k, a, 0'u32)) + +func addField(decl: var CDecl, typ: CTypeId, name: CIdent) = + decl.add cdnkField + decl.add cdnkType, typ.uint32 + decl.add cdnkIdent, name.uint32 + +func addField(decl: var CDecl, cache: var IdentCache, typ: CTypeId, name: sink string) {.inline.} = + decl.addField(typ, cache.getOrIncl(name)) + +func addIntLit(decl: var CDecl, i: uint64) {.inline.} = + decl.add cdnkIntLit, uint32(i shl 32), uint32(i and 0xFFFFFFFF'u64) + +type CTypeMap = Table[TypeKey, CTypeId] + +type TypeGenCtx = object + # inherited state + tm: CTypeMap # mutated + ctypes: seq[CTypeInfo] # mutated + cache: IdentCache # mutated + + # non-inherited state + weakTypes: set[TTypeKind] # the set of types that can be turned into forward declarations when declared as a pointer + + forwardBegin: int + forwarded: seq[PType] ## types who's creation was defered. THe first entry + ## has an ID of `forwardBegin`, the second + ## `forwardBegin + 1`, etc. + +func requestType(c: var TypeGenCtx, t: PType): CTypeId = + ## Requests the type-id for `t`. If the c-type for `t` doesn't exist yet, a + ## slot for it is reserved and it's added to the `c.forwared` list + let next = c.ctypes.len.CTypeId + result = c.tm.mgetOrPut(t.TypeKey, next) + if result == next: + # type wasn't generated yet + assert c.forwardBegin + c.forwarded.len == next.int + c.ctypes.setLen(c.ctypes.len + 1) + c.forwarded.add(t) + +func requestFuncType(c: var TypeGenCtx, t: PType): CTypeId = + # XXX: this is going to be tricky + discard + +func genRecordNode(c: var TypeGenCtx, decl: var CDecl, n: PNode): int = + case n.kind + of nkSym: + let s = n.sym + decl.addField(c.cache, c.requestType(s.typ), s.name.s) + result = 1 + of nkRecList: + for it in n.sons: + discard genRecordNode(c, decl, it) + + result = n.len + + of nkRecCase: + # TODO: properly name the generated fields, unions, and structs + decl.addField(c.cache, c.requestType(n[0].sym.typ), n[0].sym.name.s) + decl.add cdnkUnion, uint32(n.len-1) + decl.add cdnkEmpty + for i in 1.. 0 + +func getTypeName(c: var IdentCache, typ: PType): CIdent = + # TODO: not finished + if typ.sym != nil: + c.getOrIncl(mangledName(typ.sym)) + else: + let h = hashType(typ) + c.getOrIncl(fmt"{typ.kind}_{h}") + + +func genForwarded(c: var TypeGenCtx) = + ## Generates the `CTypeInfo` for all forwarded types (and also for their + ## dependencies) + var i = 0 + # note: ``genCTypeDecl`` may add to ``forwarded`` + while i < c.forwarded.len: + let fwd = c.forwarded[i] + # XXX: forwarded could be cleared when ``i == forwarded.high`` in + # order to cut down on allocations + let decl = genCTypeDecl(c, c.forwarded[i].skipTypes(abstractInst)) + c.ctypes[c.forwardBegin + i] = CTypeInfo(decl: decl, name: getTypeName(c.cache, fwd)) + inc i + + c.forwarded.setLen(0) + c.forwardBegin = c.ctypes.len # prepare for following ``requestType`` calls + +func genCType(dest: var CDecl, cache: var IdentCache, t: PType) = + template addIdentNode(n: string) = + dest.add cdnkIdent, cache.getOrIncl(n).uint32 + + const + NumericalTypeToStr: array[tyInt..tyUInt64, string] = [ + "NI", "NI8", "NI16", "NI32", "NI64", + "NF", "NF32", "NF64", "NF128", + "NU", "NU8", "NU16", "NU32", "NU64"] + + case t.kind + of tyVoid: addIdentNode("void") + of tyPointer, tyNil: + dest.add cdnkPtr + addIdentNode("void") + of tyInt..tyUInt64: + addIdentNode(NumericalTypeToStr[t.kind]) + of tyCstring: + addIdentNode("NIM_CHAR") + of tyBool: + addIdentNode("NIM_BOOL") + else: + addIdentNode(fmt"genCType_missing_{t.kind}") + +func genCType(cache: var IdentCache, t: PType): CTypeInfo = + # TODO: name handling is unfinished + genCType(result.decl, cache, t) + result.name = getTypeName(cache, t) + + +func useFunction(c: var ModuleCtx, s: PSym) = + ## + if lfHeader in s.loc.flags: + c.headers.incl getStr(s.annex.path) + elif lfNoDecl notin s.loc.flags: + discard c.syms.mgetOrPut(s.id, s) + +func useType(c: var ModuleCtx, t: PType) = + c.types[t.id] = t + +#[ +func useTypeWeak(c: var ModuleCtx, t: PType): CTypeId= + c.types + +func useType(c: var ModuleCtx, t: PType): CTypeId = +]# + +func requestFunction(c: var GlobalGenCtx, s: PSym): int = + ## Requests the ID of the C-function `s` maps to + assert s.kind in routineKinds + let nextId = c.funcs.len + result = c.funcMap.mgetOrPut(s.id, nextId) + if result != nextId: + assert result < nextId + # the header's content is generated later; we just reserve the slot here + c.funcs.setLen(c.funcs.len + 1) + + +func requestTypeName(c: var GlobalGenCtx, t: PType): CIdent = + # TODO: not finished + if t.sym != nil: + c.idents.getOrIncl(mangledName(t.sym)) + else: + c.idents.getOrIncl(fmt"requestTypeName_missing_{t.kind}") + +type GenCtx = object + f: File + tmp: int + sym: PSym + + names: seq[CAst] # IRIndex -> expr + types: seq[PType] + config: ConfigRef + + gl: GlobalGenCtx # XXX: temporary + m: ModuleCtx # XXX: temporary + +func gen(c: GenCtx, irs: IrStore3, n: IRIndex): CAst = + c.names[n] + #"gen_MISSING" + +func mapTypeV3(c: var GlobalGenCtx, t: PType): CTypeId + +func mapTypeV2(c: var GenCtx, t: PType): CTypeId = + # TODO: unfinished + c.m.useType(t) # mark the type as used + +func mapTypeV3(c: var GlobalGenCtx, t: PType): CTypeId = + let k = t.TypeKey + result = c.ctypeMap[k] + +func genProcHeader(c: var GlobalGenCtx, t: PType): ProcHeader = + assert t.kind == tyProc + + result.returnType = + if t[0].isEmptyType(): VoidCType + else: mapTypeV3(c, t[0]) + + result.args.newSeq(t.len - 1) + for i in 1.. 0 and t[0] != nil: + result = nthField(t[0].skipTypes(skipPtrs), pos) + +func safeKind(t: PType): TTypeKind {.inline.} = + if t == nil: tyVoid + else: t.kind + +func genLit(c: var GenCtx, lit: PNode): CAst = + case lit.kind + of nkIntLit: + start().intLit(lit.intVal).fin() + of nkStrLit: + if lit.typ == nil: + # XXX: some passes insert string literals without type information. It's supported for now + # treat as cstring + start().strLit(c.gl.strings, lit.strVal).fin() + else: + case lit.typ.kind + of tyString, tyDistinct: + # XXX: the string lit handling is probably too late here and should be + # done as part of the `seq` lowering passes instead + # XXX: this currently only takes the old GC-based strings into account + if lit.strVal.len == 0: + # XXX: yeah, this is bad. The lowering needs to happen at the IR level + start().add(cnkCast).ident(c.gl.idents, "NimStringDesc*").ident(c.gl.idents, "NIM_NIL").fin() + else: + genError(c, fmt"missing lit: non-empty tyString") + of tyCstring: + start().strLit(c.gl.strings, lit.strVal).fin() + else: + unreachable(lit.typ.kind) + of nkNilLit: + start().ident(c.gl.idents, "NIM_NIL").fin() + else: + genError(c, fmt"missing lit: {lit.kind}") + +template testNode(cond: bool, i: IRIndex) = + if not cond: + debugEcho astToStr(cond), " failed" + debugEcho "node: ", i + printIr(irs, exprs) + for e in irs.traceFor(i).items: + debugEcho e + if irs.at(i).kind == ntkLocal: + debugEcho "trace for local:" + for e in irs.traceForLocal(irs.getLocalIdx(i)).items: + debugEcho e + doAssert false + +proc genCode(c: var GenCtx, irs: IrStore3): CAst = + var i = 0 + template names: untyped = c.names + template types: untyped = c.types + template f: untyped = c.f + + var numStmts = 0 + result.add cnkStmtList + + var tmp = 0 + for typ, sym in irs.locals: + if sym != nil: + if lfHeader in sym.loc.flags: + let str = getStr(sym.annex.path) + continue + elif lfNoDecl in sym.loc.flags: + continue + + result.add cnkDef + result.add cnkType, mapTypeV2(c, typ).uint32 + if sym != nil: # TODO: don't test for temps like this + result.add c.gl.ident mangledName(sym) + + else: + result.add c.gl.ident(fmt"_tmp{tmp}") + inc tmp + + inc numStmts + + let exprs = calcStmt(irs) + names.newSeq(irs.len) + + for n in irs.nodes: + case n.kind + of ntkSym: + let sym = irs.sym(n) + # TODO: refactor + if sym.kind in routineKinds and sym.magic == mNone: + useFunction(c.m, sym) + elif sym.kind in {skVar, skLet} and sfGlobal in sym.flags: + c.m.syms[sym.id] = sym + #discard mapTypeV3(c.gl, sym.typ) # XXX: temporary + + if sym.kind notin routineKinds and sym.typ != nil: + useType(c.m, sym.typ) + + names[i] = start().ident(c.gl.idents, mangledName(sym)).fin() + of ntkLocal: + let (kind, typ, sym) = irs.getLocal(i) + if sym == nil: + names[i] = start().ident(c.gl.idents, "_tmp" & $c.tmp).fin() + inc c.tmp + else: + names[i] = start().ident(c.gl.idents, mangledName(sym)).fin() + + of ntkCall: + if n.isBuiltIn: + let (name, typ) = genBuiltin(c, irs, n.builtin, n) + names[i] = name + else: + let callee = irs.at(n.callee) + if callee.kind == ntkSym and irs.sym(callee).magic != mNone: + names[i] = genMagic(c, irs, irs.sym(callee).magic, n) + else: + var res = start().add(cnkCall, n.argCount.uint32).add(names[n.callee]) + for it in n.args: + discard res.add names[it] + names[i] = res.fin() + + # TODO: we're missing a proper way to check whether a call is a statement + if not exprs[i]:#irs.isStmt(n): + result.add names[i] + inc numStmts + of ntkAddr: + names[i] = start().emitAddr(c.gl.idents).add(names[n.addrLoc]).fin() + of ntkDeref: + let t = types[n.addrLoc].skipTypes(abstractInst) + testNode t.kind in {tyPtr, tyRef, tyVar, tyLent, tySink}, n.addrLoc + names[i] = start().emitDeref(c.gl.idents).add(names[n.addrLoc]).fin() + of ntkAsgn: + result.add start().add(cnkInfix).add(names[n.wrLoc]).ident(c.gl.idents, "=").add(names[n.srcLoc]).fin() + inc numStmts + of ntkPathObj: + let typ = types[n.srcLoc].skipTypes(abstractInst) + let src = names[n.srcLoc] + let idx = n.fieldIdx + var ast = start().add(cnkDotExpr).add(src) + case typ.kind + of tyObject: + let f = typ.nthField(n.fieldIdx) + discard ast.ident(c.gl.idents, mangledName(f)) + of tyTuple: + if typ.n != nil: + discard ast.ident(c.gl.idents, typ.n[idx].sym.mangledName()) + else: + # annonymous tuple + discard ast.ident(c.gl.idents, fmt"Field{idx}") + + else: + testNode false, n.srcLoc + + names[i] = ast.fin() + + of ntkPathArr: + names[i] = start().add(cnkBracket).add(names[n.srcLoc]).add(names[n.arrIdx]).fin() + of ntkLit: + names[i] = genLit(c, irs.getLit(n)) + of ntkUse: + names[i] = names[n.srcLoc] + of ntkBranch: + result.add cnkIf + result.add names[n.cond] + result.add cnkStmtList, 1 + result.add cnkGoto, c.gl.idents.getOrIncl(fmt"label{n.target}").uint32 + inc numStmts + of ntkJoin: + if irs.isLoop(n.joinPoint): + result.add genError(c, "loop impl missing") + discard#f.writeLine "while (true) {" + else: + result.add cnkLabel, c.gl.idents.getOrIncl(fmt"label{n.joinPoint}").uint32 + inc numStmts + of ntkGoto: + if irs.isLoop(n.target): + # there exists only one `goto loop` and it's at the end of the loop + # XXX: very brittle + result.add genError(c, "loop impl missing") + else: + result.add cnkGoto, c.gl.idents.getOrIncl(fmt"label{n.target}").uint32 + + inc numStmts + else: + names[i] = genError(c, fmt"missing impl: {n.kind}") + if not exprs[i]: + result.add names[i] + inc numStmts + + inc i + + # exit + if c.sym.typ.n[0].typ.isEmptyType(): + result.add cnkReturn + else: + result.add cnkReturn, 1 + result.add cnkIdent, c.gl.idents.getOrIncl("result").uint32 + inc numStmts + + echo numStmts + result[0].a = numStmts.uint32 + +proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl) + +proc emitType(f: File, c: GlobalGenCtx, t: CTypeId) = + let info {.cursor.} = c.ctypes[t.int] + if info.name != InvalidCIdent: + f.write c.idents[info.name] + else: + # the declaration is emitted directly if a type has no name + emitCDecl(f, c, info.decl) + +proc emitCAst(f: File, c: GlobalGenCtx, ast: CAst, pos: var int) + +proc emitAndEscapeIf(f: File, c: GlobalGenCtx, ast: CAst, pos: var int, notSet: set[CAstNodeKind]) = + if ast[pos].kind in notSet: + emitCAst(f, c, ast, pos) + else: + f.write "(" + emitCAst(f, c, ast, pos) + f.write ")" + + +proc emitCAst(f: File, c: GlobalGenCtx, ast: CAst, pos: var int) = + if pos >= ast.len: + for it in ast: + echo it + + let n = ast[pos] + inc pos + + case n.kind + of cnkError: + f.write "GEN_ERROR(\"" + f.write c.strings[n.a.LitId] + f.write "\")" + of cnkStmtList: + for _ in 0.. 0: + f.write ", " + + emitCAst(f, c, ast, pos) + + f.write ")" + + of cnkIf: + f.write "if (" + emitCAst(f, c, ast, pos) # condition + f.writeLine ") {" + emitCAst(f, c, ast, pos) # stmt list + f.write "}" + + of cnkReturn: + f.write "return" + if n.a == 1: + emitCAst(f, c, ast, pos) + + of cnkLabel: + f.write c.idents[n.a.LitId] + f.writeLine ":" + of cnkGoto: + f.write "goto " + f.write c.idents[n.a.LitId] + of cnkDotExpr: + f.write "(" + emitCAst(f, c, ast, pos) + f.write ")." + emitCAst(f, c, ast, pos) + + of cnkStrLit: + f.write '"' + f.write c.strings[n.a.LitId] + f.write '"' + + of cnkIntLit: + f.write (n.a.uint64 shl 64) or n.b.uint64 + + of cnkType: + emitType(f, c, n.a.CTypeId) + + of cnkCast: + f.write "(" + emitCAst(f, c, ast, pos) + f.write ") (" + emitCAst(f, c, ast, pos) + f.write ")" + + of cnkBraced: + f.write "{" + for i in 0.. 0: + f.write ", " + emitCAst(f, c, ast, pos) + f.write "}" + + else: + f.write "EMIT_ERROR(\"missing " & $n.kind & "\")" + +proc emitCAst(f: File, c: GlobalGenCtx, ast: CAst) = + var pos = 0 + while pos < ast.len: + emitCAst(f, c, ast, pos) + + +proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl, pos: var int) + +proc emitFuncDecl(f: File, c: GlobalGenCtx, decl: CDecl, ident: CIdent, L: int, pos: var int) = + emitCDecl(f, c, decl, pos) # return type + if ident != InvalidCIdent: + f.write "(*" + f.write c.idents[ident] + f.write ")(" + else: + f.write "(*)(" + for i in 0.. 0: + f.write ", " + + emitCDecl(f, c, decl, pos) + + f.write ")" + +proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl, pos: var int) = + if pos >= decl.len: + for it in decl: + echo it + let n = decl[pos] + inc pos + + case n.kind + of cdnkStruct, cdnkUnion: + f.write: + if n.kind == cdnkStruct: "struct " + else: "union " + + emitCDecl(f, c, decl, pos) + f.writeLine "{" + for _ in 0.. 0, c.idents[info.name] + + let kind = info.decl[0].kind + case kind + of cdnkStruct, cdnkUnion: + emitCDecl(f, c, info.decl, pos) + of cdnkBracket: + f.write "typedef " + pos = 1 + emitCDecl(f, c, info.decl, pos) + f.write " " + f.write c.idents[info.name] + f.write "[" + emitCDecl(f, c, info.decl, pos) # the array size + f.write "]" + of cdnkFuncPtr: + f.write "typedef " + pos = 1 + emitFuncDecl(f, c, info.decl, info.name, info.decl[0].a.int, pos) + else: + f.write "typedef " + emitCDecl(f, c, info.decl, pos) + f.write " " + f.write c.idents[info.name] + + f.writeLine ";" + + assert pos == info.decl.len + +proc writeDecl(f: File, c: GlobalGenCtx, h: ProcHeader, name: PSym) = + emitType(f, c, h.returnType) + f.write(" ") + f.write(mangledName(name)) + f.write("(") + for i, it in h.args.pairs: + if i > 0: + f.write ", " + + emitType(f, c, it.typ) + + f.writeLine(");") + +proc writeDef(f: File, c: GlobalGenCtx, h: ProcHeader, name: PSym) = + emitType(f, c, h.returnType) + f.write(" ") + f.write(mangledName(name)) + f.write("(") + for i, it in h.args.pairs: + if i > 0: + f.write ", " + + emitType(f, c, it.typ) + f.write " " + f.write c.idents[it.name] + + f.writeLine(") {") + +proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray[(PSym, IrStore3)]) = + let f = open(filename.string, fmWrite) + defer: f.close() + + echo "Here: ", filename.string + + var + ctx: GlobalGenCtx + mCtx: ModuleCtx + asts: seq[CAst] + + tgc = TypeGenCtx(weakTypes: {tyObject, tyTuple}) + + template swapTypeCtx() = + swap(tgc.tm, ctx.ctypeMap) + swap(tgc.ctypes, ctx.ctypes) + swap(tgc.cache, ctx.idents) + + ctx.ctypes.add(CTypeInfo(name: ctx.idents.getOrIncl("void"))) # the `VoidCType` + # XXX: we need the `NimStringDesc` PType here + #ctx.ctypes.add(CTypeInfo(name: ctx.idents.getOrIncl("NimString"))) # XXX: wrong, see above + mCtx.headers.incl("\"nimbase.h\"") + + tgc.forwardBegin = ctx.ctypes.len + + for sym, irs in procs.items: + useFunction(mCtx, sym) + + if sfImportc in sym.flags: + asts.add(default(CAst)) + continue + + echo "genFor: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) + var c = GenCtx(f: f, config: conf, sym: sym) + # doing a separate pass for the type computation instead of doing it in + # `genCode` is probably a bit less efficient, but it's also simpler; + # requires less code duplication; and is also good for modularity + c.types = computeTypes(irs) + + swapTypeCtx() + + # request all types used inside the IR to be setup. Note that this only + # means that the C-type equivalents are created, not that the declarations + # are also emitted in the output file + for t in c.types.items: + if t != nil: + discard tgc.requestType(t) + + swapTypeCtx() + + swap(c.gl, ctx) + swap(c.m, mCtx) + asts.add genCode(c, irs) + swap(c.m, mCtx) + swap(c.gl, ctx) + + swapTypeCtx() + # XXX: this might lead to an ordering problem, since we're not registering + # the types on the first occurence + # mark the types used in routine signatures as used + for sym in mCtx.syms.values: + case sym.kind + of routineKinds: + for it in sym.typ.sons: + if it != nil: + discard tgc.requestType(it) + mCtx.useType(it) + else: + discard tgc.requestType(sym.typ) + mCtx.useType(sym.typ) + + tgc.genForwarded() + + swapTypeCtx() + + var used: seq[CTypeId] + + block: + for typ in mCtx.types.values: + used.add ctx.ctypeMap[typ.TypeKey] + + for i, t in ctx.ctypes.pairs: + assert t.name != InvalidCIdent, $i + + f.writeLine "#define NIM_INTBITS 64" # TODO: don't hardcode + + # headers + for h in mCtx.headers.items: + f.writeLine fmt"#include {h}" + + # type section + + proc emitWithDeps(f: File, c: GlobalGenCtx, t: CTypeId, + marker: var PackedSet[CTypeId]) = + let info {.cursor.} = c.ctypes[t.int] + if info.name == InvalidCIdent or marker.containsOrIncl(t): + # nothing to do + return + + # scan the type's body for non-weak dependencies and emit them first + for n in info.decl.items: + if n.kind == cdnkType: + emitWithDeps(f, c, n.a.CTypeId, marker) + + if info.decl.len > 0: + # only emit types that have a declaration + emitCType(f, c, info) + + var marker: PackedSet[CTypeId] + for it in used.items: + emitWithDeps(f, ctx, it, marker) + + + # generate all procedure forward declarations + for sym in mCtx.syms.values: + case sym.kind + of routineKinds: + #echo "decl: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) + let hdr = genProcHeader(ctx, sym.typ) + + writeDecl(f, ctx, hdr, sym) + of skLet, skVar: + emitType(f, ctx, ctx.ctypeMap[sym.typ.TypeKey]) + f.write " " + f.write mangledName(sym) + f.writeLine ";" + of skConst: + f.writeLine "EMIT_ERROR(\"missing logic: const\")" + else: + unreachable(sym.kind) + + var i = 0 + for it in asts.items: + if it.len == 0: + inc i + continue + + let (sym, _) = procs[i] + let hdr = genProcHeader(ctx, sym.typ) + writeDef(f, ctx, hdr, sym) + try: + emitCAst(f, ctx, it) + except: + echo "emit: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) + raise + f.writeLine "}" + inc i \ No newline at end of file diff --git a/compiler/vm/irdbg.nim b/compiler/vm/irdbg.nim new file mode 100644 index 00000000000..1dac223587c --- /dev/null +++ b/compiler/vm/irdbg.nim @@ -0,0 +1,95 @@ +import compiler/vm/vmir + +import std/strformat + +# TODO: not related to debugging, move proc somewhere else +func calcStmt*(irs: IrStore3): seq[bool] = + # XXX: very poor way of checking for statements + result.newSeq(irs.len) + var i = 0 + for n in irs.nodes: + case n.kind + of ntkSym, ntkLocal, ntkJoin, ntkLit, ntkGoto: + discard + of ntkCall: + for it in n.args: + result[it] = true + + if not n.isBuiltIn: + result[n.callee] = true + of ntkAddr, ntkDeref: + result[n.addrLoc] = true + of ntkAsgn: + result[n.wrLoc] = true + result[n.srcLoc] = true + of ntkPathObj: + result[n.srcLoc] = true + of ntkPathArr: + result[n.srcLoc] = true + result[n.arrIdx] = true + of ntkUse: + result[n.srcLoc] = true + of ntkBranch: + result[n.cond] = true + else: + debugEcho "Skipping: ", n.kind + inc i + + +proc printIr*(irs: IrStore3, exprs: seq[bool]) = + var i = 0 + for n in irs.nodes: + var line = "" + case n.kind + of ntkSym: + line = fmt"sym {irs.sym(n).name.s}" + of ntkAsgn: + case n.asgnKind + of askCopy, askDiscr: + line = fmt"{n.wrLoc} = {n.srcLoc}" + of askMove: + line = fmt"{n.wrLoc} = move {n.srcLoc}" + of askShallow: + line = fmt"{n.wrLoc} = shallow {n.srcLoc}" + of askInit: + line = fmt"{n.wrLoc} := {n.srcLoc}" + of ntkDeref: + line = fmt"deref {n.addrLoc}" + of ntkLit: + line = fmt"lit {irs.getLit(n).kind}" + of ntkUse: + line = fmt"use {n.srcLoc}" + of ntkGoto: + line = fmt"goto label:{n.target}" + of ntkLocal: + let (k, t, _) = irs.getLocal(i) + line = fmt"local kind:{k} idx:{irs.getLocalIdx(i)} typ:{t.kind}" + of ntkPathObj: + line = fmt"path obj:{n.srcLoc} field:{n.fieldIdx}" + of ntkPathArr: + line = fmt"path arr:{n.srcLoc} idx:@{n.arrIdx}" + of ntkCall: + if n.isBuiltIn: + line = fmt"call bi: {n.builtin}" + else: + line = fmt"call {n.callee} args: [" + for arg in n.args: + line.add fmt"{arg} " + line.add "]" + of ntkBranch: + line = fmt"branch label:{n.target} cond:{n.cond}" + of ntkJoin: + if irs.isLoop(n.joinPoint): + echo "loop ", n.joinPoint, ":" + else: + echo "label ", n.joinPoint, ":" + inc i + continue + else: + line = fmt"" + + if exprs[i]: + echo i, ": ", line + else: + echo " ", line + inc i diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim new file mode 100644 index 00000000000..3b071889ebb --- /dev/null +++ b/compiler/vm/irgen.nim @@ -0,0 +1,1498 @@ +import + std/[ + tables, + strutils + ], + compiler/ast/[ + renderer, + types, + trees, + ast, + astalgo, + reports, + lineinfos + ], + compiler/modules/[ + magicsys, + modulegraphs + ], + compiler/front/[ + msgs, + options + ], + compiler/vm/[ + vmir + ], + experimental/[ + results + ] + +from compiler/vm/vmaux import findRecCase, findMatchingBranch +from compiler/vm/vmdef import unreachable + +type TBlock = object + label: PSym + start: JoinPoint + +type LocalId = int +type ScopeId = uint32 + +type PProc* = object + sym*: PSym + + excHandlers: seq[JoinPoint] + blocks: seq[TBlock] + variables: seq[LocalId] ## each non-temporary local in the order of their definition + + # each local has an owning scope (the smallest enclosing one). When the + # control-flow leaves a scope, all locals it owns need be destroyed + scopes: seq[(bool, ScopeId, IRIndex)] + + finalizers: seq[(Slice[IRIndex], JoinPoint)] # + + numLocals: seq[uint32] # the number of locals for each scope + + scopeStack: seq[ScopeId] + nextScopeId: ScopeId + + locals: Table[int, int] + +type TCtx* = object + + irs*: IrStore3 + + prc*: PProc + handlers: seq[PNode] # the defered exception handlers + + graph*: ModuleGraph # only needed for testing if a proc has a body + idgen*: IdGenerator # needed for creating magics on-demand + + module*: PSym + + config*: ConfigRef + + options*: set[TOption] + + +type IrGenResult* = Result[IrStore3, SemReport] + +when defined(nimCompilerStacktraceHints): + import std/stackframes + +type + VmGenError = object of CatchableError + report: SemReport + +const + NormalExit = 0 + ExceptionalExit = 1 + +func raiseVmGenError( + report: sink SemReport, + loc: TLineInfo, + inst: InstantiationInfo + ) {.noinline, noreturn.} = + report.location = some(loc) + report.reportInst = toReportLineInfo(inst) + raise (ref VmGenError)(report: report) + +func fail( + info: TLineInfo, + kind: ReportKind, + ast: PNode = nil, + sym: PSym = nil, + str: string = "", + loc: InstantiationInfo = instLoc() + ) {.noinline, noreturn.} = + raiseVmGenError( + SemReport(kind: kind, ast: ast, sym: sym, str: str), + info, + loc) + +func irParam(ir: var IrStore3, sym: PSym): IRIndex = + ir.irSym(sym) + +func irGlobal(ir: var IrStore3, sym: PSym): IRIndex = + ir.irSym(sym) + +func irConst(ir: var IrStore3, sym: PSym): IRIndex = + ir.irSym(sym) + +proc irImm(c: var TCtx, val: SomeInteger): IRIndex = + # XXX: getSysType has side-effects + c.irs.irLit newIntTypeNode(BiggestInt(val), c.graph.getSysType(unknownLineInfo, tyInt)) + +template tryOrReturn(code): untyped = + try: + code + except VmGenError as e: + return IrGenResult.err(move e.report) + + +proc getTemp(cc: var TCtx; tt: PType): IRIndex + +func openScope(c: var TCtx) = + let id = c.prc.nextScopeId + inc c.prc.nextScopeId + + c.prc.numLocals.add(0) + c.prc.scopes.add((false, id, c.irs.len)) + c.prc.scopeStack.add(id) + +func closeScope(c: var TCtx) = + let id = c.prc.scopeStack.pop() + c.prc.scopes.add((true, id, c.irs.len)) + +proc genProcSym(c: var TCtx, n: PNode): IRIndex = + assert n.kind == nkSym + c.irs.irSym(n.sym) + +proc irCall(c: var TCtx, name: string, args: varargs[IRIndex]): IRIndex = + # TODO: compiler procs should be cached here in `TCtx` + let sym = c.graph.getCompilerProc(name) + c.irs.irCall(c.irs.irSym(sym), args) + +func irCall(c: var TCtx, name: string, m: TMagic, args: varargs[IRIndex]): IRIndex {.inline.} = + # TODO: instead of creating a new duplicate magic each time, all used magics + # should be only created once and then reused + let sym = createMagic(c.graph, c.idgen, name, m) + c.irs.irCall(c.irs.irSym(sym), args) + +proc getTemp(cc: var TCtx; tt: PType): IRIndex = + let id = cc.irs.genLocal(lkTemp, tt) + cc.irs.irLocal(id) + +func irNull(c: var IrStore3, t: PType): IRIndex = + # XXX: maybe `irNull` should be a dedicated IR node? + let id = c.genLocal(lkTemp, t) + c.irLocal(id) + +proc popBlock(c: var TCtx; oldLen: int) = + #for f in c.prc.blocks[oldLen].fixups: + # c.patch(f) + c.prc.blocks.setLen(oldLen) + +template withBlock(labl: PSym; next: JoinPoint; body: untyped) {.dirty.} = + var oldLen {.gensym.} = c.prc.blocks.len + c.prc.blocks.add TBlock(label: labl, start: next) + body + popBlock(c, oldLen) + +proc gen(c: var TCtx; n: PNode; dest: var IRIndex) + +proc gen(c: var TCtx; n: PNode) = + doAssert n.typ.isEmptyType + var tmp: IRIndex + gen(c, n, tmp) + +proc genx(c: var TCtx; n: PNode): IRIndex = + #var tmp: TDest = -1 + #gen(c, n, tmp) + #internalAssert c.config, tmp >= 0 # 'nim check' does not like this internalAssert. + c.gen(n, result) + c.config.internalAssert(result != InvalidIndex, n.info, $n.kind) + +proc gen2(c: var TCtx, n: PNode): tuple[r: IRIndex, exits: bool] = + c.gen(n, result.r) + + # if the statement ends with a goto, it's not a normal exit + # XXX: it's probably a better idea to look at the `n` instead + result.exits = not c.irs.isLastAGoto() + +proc isNotOpr(n: PNode): bool = + n.kind in nkCallKinds and n[0].kind == nkSym and + n[0].sym.magic == mNot + +proc isTrue(n: PNode): bool = + n.kind == nkSym and n.sym.kind == skEnumField and n.sym.position != 0 or + n.kind == nkIntLit and n.intVal != 0 + +proc genStmt2(c: var TCtx, n: PNode): bool = + gen(c, n) + result = true # XXX: calculate the correct value + +proc genWhile(c: var TCtx; n: PNode, next: JoinPoint) = + # lab1: + # cond, tmp + # fjmp tmp, lab2 + # body + # jmp lab1 + # lab2: + var entrances: seq[IRIndex] + withBlock(nil, next): + + # the scope needs to be opened _before_ emitting the join, or else a loop + # iteration would be treated as leaving the scope + c.openScope() + let loop = c.irs.irLoopJoin() + if isTrue(n[0]): + # don't emit a branch if the condition is always true + discard + else: + # TODO: omit the while loop if cond == false? + var tmp = c.genx(n[0]) + let lab2 = c.irs.irBranch(tmp, next) + #c.prc.blocks[^1].endings.add(lab2) + + let exits = c.genStmt2(n[1]) + if exits: + discard c.irs.irGoto(loop) + #entrances.add(c.irs.irGetCf()) + + c.closeScope() + + #c.irs.irPatchStart(start, entrances) + + #result = c.irs.irJoin(c.prc.blocks[^1].endings) + +proc genBlock(c: var TCtx; n: PNode, next: JoinPoint): IRIndex = + withBlock(n[0].sym, next): + c.gen(n[1], result) + +func irGotoRaise(c: var IrStore, i: IRIndex): IRIndex = + missingImpl() + +func irGotoLink(c: var IrStore, i: IRIndex): IRIndex = + missingImpl() + +iterator rmitems[T](x: var openArray[T]): var T = + var i = x.high + while i >= 0: + yield x[i] + dec i + +proc genBreak(c: var TCtx; n: PNode): IRIndex = + var i = c.prc.blocks.high + if n[0].kind == nkSym: + #echo cast[int](n[0].sym) + while i >= 0 and c.prc.blocks[i].label != n[0].sym: + dec i + + if i < 0: + # XXX: this isn't a user error + fail(n.info, rsemVmCannotFindBreakTarget) + + c.irs.irGoto(c.prc.blocks[i].start) + +func irFwd(c: var TCtx): IRIndex = + missingImpl() + +func irEnd(c: var TCtx, target, value: IRIndex): IRIndex = + missingImpl() + + +proc genIf(c: var TCtx, n: PNode, next: JoinPoint): IRIndex = + # if (!expr1) goto lab1; + # thenPart + # goto LEnd + # lab1: + # if (!expr2) goto lab2; + # thenPart2 + # goto LEnd + # lab2: + # elsePart + # Lend: + + let hasValue = not isEmptyType(n.typ) + + var value: IRIndex + if hasValue: + value = c.getTemp(n.typ) + + var prev = next + for i in 0.. next + # asgn dest, b + + let tmp = c.getTemp(n.typ) + let a = c.genx(n[1]) + c.irs.irAsgn(askInit, tmp, a) + + let cond = + if isAnd: a + else: c.irCall("not", mNot, a) + + let p = c.irs.irBranch(cond, next) + + let b = c.genx(n[2]) + c.irs.irAsgn(askInit, tmp, b) + + result = tmp + +proc genCase(c: var TCtx; n: PNode, next: JoinPoint): IRIndex = + # if (!expr1) goto lab1; + # thenPart + # goto LEnd + # lab1: + # if (!expr2) goto lab2; + # thenPart2 + # goto LEnd + # lab2: + # elsePart + # Lend: + let hasValue = not isEmptyType(n.typ) + + let dest = + if hasValue: c.getTemp(n.typ) + else: InvalidIndex + + let selType = n[0].typ.skipTypes(abstractVarRange) + var b = next + block: + let tmp = c.genx(n[0]) + # branch tmp, codeIdx + # fjmp elseLabel + + # iterate of/else branches + for i in 1.. 1: + c.irs.irJoin(b) + + b = + if i < n.len - 1: c.irs.irJoinFwd() + else: next + + if branch.len == 1: + # else stmt: + if branch[0].kind != nkNilLit or branch[0].typ != nil: + # TODO: re-document the intention behind the if + r = c.gen2(branch[0]) + + else: + # elif branches were eliminated during transformation + doAssert branch.kind == nkOfBranch + + let cond = c.irs.irCall(bcOf, nil, c.irs.irLit(branch)) + + c.irs.irBranch(cond, b) + r = c.gen2(branch.lastSon) + + if r[1]: + if hasValue and r[0] != InvalidIndex: + # `r[0]` is unset if `branch` ends in a void `noreturn` call, so we + # have to guard against that case + c.irs.irAsgn(askInit, dest, r[0]) + + c.irs.irGoto(next) + + result = dest + +func genExceptCond(c: var TCtx, val: IRIndex, n: PNode, next: JoinPoint) = + ## Lowers exception matching into an if + # XXX: maybe too early for this kind of lowering + for i in 0.. 0: + c.excHandlers[^1] + else: + ExceptionalExit + + +proc genTry(c: var TCtx; n: PNode, next: JoinPoint): IRIndex = + + let + hasFinally = n.lastSon.kind == nkFinally + hasExcept = n[1].kind == nkExceptBranch + + if hasFinally: + # the finally also applies for the ``except`` blocks + discard #c.prc.finalizers.add(c.irs.irJoinFwd()) + + if hasExcept: + c.prc.excHandlers.add(c.irs.irJoinFwd()) + + let dest = + if not isEmptyType(n.typ): c.getTemp(n.typ) + else: InvalidIndex + + let r = c.gen2(n[0]) + + if r.exits: + if dest != InvalidIndex: + # TODO: assert that gen2 doesn't return a value + c.irs.irAsgn(askInit, dest, r.r) + #[let t = + if hasFinally: c.prc.finalizers[^1] + else: next]# + + c.irs.irGoto(next) + + let len = + if hasFinally: n.len-1 + else: n.len + + if hasExcept: + let eVal = c.irCall("getCurrentException") + let handler = c.prc.excHandlers.pop() # pop the handler we registered at + # the start + var currNext = handler + for i in 1..= 0: r + else: + unreachable(s.name.s) + +proc genField(c: TCtx; n: PNode): int = + if n.kind != nkSym or n.sym.kind != skField: + fail(n.info, rsemNotAFieldSymbol, ast = n) + + let s = n.sym + if s.position > high(typeof(result)): + fail(n.info, rsemVmTooLargetOffset, sym = s) + + result = s.position + +func irLit(ir: var IrStore3, i: SomeInteger): IRIndex = + ir.irLit newIntNode(nkIntLit, BiggestInt(i)) + +proc genIndex(c: var TCtx; n: PNode; arr: PType): IRIndex = + if arr.skipTypes(abstractInst).kind == tyArray and (let x = firstOrd(c.config, arr); + x != Zero): + let tmp = c.genx(n) + + result = c.irCall("-", mSubI, tmp, c.irs.irLit(toInt(x))) + else: + result = c.genx(n) + +proc genCheckedObjAccessAux(c: var TCtx; n: PNode; dest: var IRIndex) + +template sizeOfLikeMsg(name): string = + "'$1' requires '.importc' types to be '.completeStruct'" % [name] + +func genTypeLit(c: var TCtx, t: PType): IRIndex + +proc isInt8Lit(n: PNode): bool = + if n.kind in {nkCharLit..nkUInt64Lit}: + result = n.intVal >= low(int8) and n.intVal <= high(int8) + +proc isInt16Lit(n: PNode): bool = + if n.kind in {nkCharLit..nkUInt64Lit}: + result = n.intVal >= low(int16) and n.intVal <= high(int16) + +func wrapIf(c: var TCtx, wrapper: BuiltinCall, typ: PType, expr: IRIndex, cond: bool): IRIndex {.inline.} = + if cond: c.irs.irCall(wrapper, typ, expr) + else: expr + +proc genMagic(c: var TCtx; n: PNode; m: TMagic): IRIndex = + result = InvalidIndex + case m + of mAnd, mOr: + let fwd = c.irs.irJoinFwd() + result = c.genAndOr(n, isAnd = (m == mAnd), fwd) + c.irs.irJoin(fwd) + of mAddI: + # idea: also insert builtin calls to the various check functions here. + # Makes it easier to get uniformity across the back-ends. + result = c.genCall(n) + result = c.wrapIf(bcOverflowCheck, n.typ, result, optOverflowCheck notin c.options) + if optOverflowCheck in c.options: + # idea: defects (or error in general) could be encoded as part of the values. I.e. a + # `bcOverflowCheck` call would return a result-like value (only on + # the IR level, not in the resulting generate code) + # TODO: unfinished + c.raiseExit() + of mSwap: + let + tmp = c.getTemp(n[1].typ) + a = c.genx(n[1]) + b = c.genx(n[2]) + # TODO: maybe don't lower this early? + # XXX: a swap could be treated as a rename... + c.irs.irAsgn(askShallow, tmp, a) + c.irs.irAsgn(askShallow, a, b) + c.irs.irAsgn(askShallow, b, tmp) + of mReset: + var d = c.genx(n[1]) + unreachable("missing") + of mGetTypeInfo: + # transform the `getTypeInfo(x)` into a `getTypeInfo(typeof(x))` + result = c.irCall("getTypeInfo", mGetTypeInfo, genTypeLit(c, n[1].typ)) + + of mDefault: + result = c.irs.irNull(n.typ) + of mRunnableExamples: + discard "just ignore any call to runnableExamples" + of mDestroy, mTrace: + # these should not exist yet + unreachable(n.kind) + of mMove: + unreachable("not handled here") + of mConStrStr: + # the `mConStrStr` magic is very special. Nested calls to it are flattened + # into a single call in ``transf`` + var args = newSeq[IRIndex](n.len - 1) + for i in 1.. nkConv(x) + result = copyNode(n[0]) + result.add m[0] + of nkHiddenStdConv, nkHiddenSubConv, nkConv: + var m = n[0][1] + if m.kind in {nkDerefExpr, nkHiddenDeref}: + # addr ( nkConv ( deref ( x ) ) ) --> nkConv(x) + result = copyNode(n[0]) + result.add m[0] + of nkError: result = nil + else: + if n[0].kind in {nkDerefExpr, nkHiddenDeref}: + # addr ( deref ( x )) --> x + result = n[0][0] + +proc genAddr(c: var TCtx, n: PNode): IRIndex = + if (let m = canElimAddr(n); m != nil): + return genx(c, m) + + let tmp = c.genx(n[0]) + result = c.irs.irAddr(tmp) + +proc genDeref(c: var TCtx, n: PNode): IRIndex = + let dest = genx(c, n[0]) + c.irs.irDeref(dest) + +proc genAsgn(c: var TCtx; dest: IRIndex; ri: PNode; requiresCopy: bool) = + if isMove(ri): + # a moving assign + # TODO: a `move(move(a))` would wreak havoc + let tmp = c.genx(ri[1]) + c.irs.irAsgn(askMove, dest, tmp) + else: + let tmp = c.genx(ri) + c.irs.irAsgn(askCopy, dest, tmp) + +func cannotEval(c: TCtx; n: PNode) {.noinline, noreturn.} = + raiseVmGenError( + reportAst(rsemVmCannotEvaluateAtComptime, n), + n.info, + instLoc()) + +func isOwnedBy(a, b: PSym): bool = + var a = a.owner + while a != nil and a.kind != skModule: + if a == b: return true + a = a.owner + +func getOwner(c: TCtx): PSym = + result = c.prc.sym + if result.isNil: result = c.module + + +proc genDiscrVal(c: var TCtx, discr: PSym, n: PNode, oty: PType): (IRIndex, IRIndex) = + ## Generate the code for preparing and loading the discriminator value + ## as expected by the execution engine + + let oty = oty.skipTypes(abstractPtrs) + assert oty.kind == tyObject + + let discrTyp = lookupInRecord(oty.n, discr.name).typ + + let recCase = findRecCase(oty, discr) + assert recCase != nil + + if n.kind in nkCharLit..nkUInt64Lit: + # Discriminator value is known at compile-time + + let b = findMatchingBranch(recCase, n) + assert b != -1 # no matching branch; should have been caught already + + result[0] = c.genLit(n) # discr value + result[1] = c.irs.irLit(b) # branch index + else: + let tmp = c.genx(n) + result[0] = tmp + result[1] = c.irs.irCall(bcGetBranchIndex, nil, tmp, c.genTypeLit(oty), c.irs.irSym(discr)) + +func isCursor(n: PNode): bool + +proc genFieldAsgn(c: var TCtx, obj: IRIndex; le, ri: PNode) = + c.config.internalAssert(le.kind == nkDotExpr) + + let idx = c.genField(le[1]) + let s = le[1].sym + + var tmp: IRIndex + + let p = c.irs.irPathObj(obj, idx) + + if sfDiscriminant notin s.flags: + genAsgn(c, p, ri, requiresCopy = not isCursor(le)) + else: + # Can't use `s.owner.typ` since it may be a `tyGenericBody` + #tmp = c.genDiscrVal(le[1], ri, le[0].typ) + #c.irs.irAsgn(askDiscr, p, tmp) + let (dVal, bVal) = c.genDiscrVal(s, ri, le[0].typ) + discard c.irs.irCall(bcSwitch, nil, p, dVal, bVal) + +func isCursor(n: PNode): bool = + case n.kind + of nkSym: + sfCursor in n.sym.flags + of nkDotExpr: + isCursor(n[0]) or isCursor(n[1]) + of nkCheckedFieldExpr, nkBracketExpr: + isCursor(n[0]) + else: + false + + +proc genRdVar(c: var TCtx; n: PNode;): IRIndex = + let s = n.sym + if sfGlobal in s.flags: + c.irs.irGlobal(s) + elif s.kind == skParam: + c.irs.irParam(s) + elif s.kind == skResult: c.irs.irLocal(0) # TODO: don't hardcode + else: c.irs.irLocal(c.prc.local(s)) + +proc genAsgn(c: var TCtx; le, ri: PNode; requiresCopy: bool) = + # TODO: move and cursor handling is missing + case le.kind + of nkError: + # XXX: do a better job with error generation + fail(le.info, rsemVmCannotGenerateCode, le) + + of nkBracketExpr: + let typ = le[0].typ.skipTypes(abstractVarRange-{tyTypeDesc}).kind + let dest = c.genx(le[0]) + + let x = + if typ == tyTuple: + c.irs.irPathObj(dest, le[1].intVal.int) + else: + c.irs.irPathArr(dest, c.genIndex(le[1], le[0].typ)) + + genAsgn(c, x, ri, requiresCopy = not isCursor(le)) + + of nkCheckedFieldExpr: + var objR: IRIndex + genCheckedObjAccessAux(c, le, objR) + c.genFieldAsgn(objR, le[0], ri) + of nkDotExpr: + let dest = c.genx(le[0]) + c.genFieldAsgn(dest, le, ri) + of nkSym: + let dest = genRdVar(c, le) + genAsgn(c, dest, ri, requiresCopy = not isCursor(le)) + of nkDerefExpr, nkHiddenDeref: + let dest = c.genx(le[0]) + genAsgn(c, c.irs.irDeref(dest), ri, requiresCopy = true) # XXX: is `requiresCopy = true` correct? + else: + unreachable(le.kind) + +proc genArrAccessOpcode(c: var TCtx; n: PNode): tuple[arr, idx: IRIndex] = + result.arr = c.genx(n[0]) + result.idx = c.genIndex(n[1], n[0].typ) + +proc genObjAccess(c: var TCtx; n: PNode): IRIndex = + let a = c.genx(n[0]) + let b = genField(c, n[1]) + c.irs.irPathObj(a, b) + +proc genCheckedObjAccessAux(c: var TCtx; n: PNode; dest: var IRIndex) = + internalAssert( + c.config, + n.kind == nkCheckedFieldExpr, + "genCheckedObjAccessAux requires checked field node") + + # nkDotExpr to access the requested field + let accessExpr = n[0] + # nkCall to check if the discriminant is valid + var checkExpr = n[1] + + let negCheck = checkExpr[0].sym.magic == mNot + if negCheck: + checkExpr = checkExpr[^1] + + # Discriminant symbol + let disc = checkExpr[2] + internalAssert( + c.config, disc.sym.kind == skField, "Discriminant symbol must be a field") + + # Load the object in `dest` + dest = c.genx(accessExpr[0]) + + if optFieldCheck in c.options: + let discVal = c.irs.irUse(c.irs.irPathObj(dest, genField(c, disc))) + var cond = c.irCall("contains", mInSet, c.irs.irLit(checkExpr[1]), discVal) + if negCheck: + cond = c.irCall("not", mNot, cond) + + let lab1 = c.irs.irJoinFwd() + c.irs.irBranch(cond, lab1) + discard c.irs.irCall(bcRaiseFieldErr, nil, discVal) + c.raiseExit() + c.irs.irJoin(lab1) + +proc genCheckedObjAccess(c: var TCtx; n: PNode): IRIndex = + var objR: IRIndex + genCheckedObjAccessAux(c, n, objR) + + let accessExpr = n[0] + # Field symbol + var field = accessExpr[1] + internalAssert( + c.config, + field.sym.kind == skField, + "Access expression must be a field, but found " & $field.sym.kind) + + # Load the content now + let fieldPos = genField(c, field) + c.irs.irPathObj(objR, fieldPos) + +func genTypeLit(c: var TCtx, t: PType): IRIndex = + c.irs.irLit(PNode(kind: nkType, typ: t)) + +proc genArrAccess(c: var TCtx; n: PNode): IRIndex = + let arrayType = n[0].typ.skipTypes(abstractVarRange-{tyTypeDesc}).kind + case arrayType + of tyTypeDesc: + c.genTypeLit(n.typ) + of tyTuple: + let a = c.genx(n[0]) + let b = n[1].intVal + c.irs.irUse(c.irs.irPathObj(a, b.int)) + of tyArray, tySequence, tyOpenArray, tyVarargs, tyUncheckedArray, tyString, tyCstring: + let acc = genArrAccessOpcode(c, n) + c.irs.irUse(c.irs.irPathArr(acc.arr, acc.idx)) + else: unreachable(arrayType) + +func addVariable(c: var TCtx, kind: LocalKind, s: PSym): IRIndex = + assert kind != lkTemp + let id = c.irs.genLocal(kind, s) + c.prc.locals[s.id] = id + c.prc.variables.add(id) + inc c.prc.numLocals[^1] + + c.irs.irLocal(id) + +proc genVarTuple(c: var TCtx, kind: LocalKind, n: PNode) = + ## Generates the code for a ``let/var (a, b) = c`` statement + assert n.kind == nkVarTuple + var lhs = newSeq[IRIndex](n.len - 2) + + # first, generate the IR for the left side (left-to-right evaluation) + for i in 0.. 0: + for i, x in n.pairs: + let + a = c.genx(x) + idx = c.irImm(i) + + # XXX: the loss of information due to the lowering might be a problem + # for the code-generators + c.irs.irAsgn(askInit, c.irs.irPathArr(result, idx), a) + + result = c.irs.irUse(result) + +proc genSetElem(c: var TCtx, n: PNode, first: int): IRIndex = + result = c.getTemp(n.typ) + + if first != 0: + if n.kind in nkIntKinds: + # a literal value + result = c.irImm(int(n.intVal - first)) + else: + result = genx(c, n) + if first > 0: + result = c.irCall("-", mSubI, result, first) + else: + result = c.irCall("+", mAddI, result, -first) + + else: + result = genx(c, n) + +proc genSetConstr(c: var TCtx, n: PNode): IRIndex = + result = c.getTemp(n.typ) + # XXX: since `first` stays the same across the loop, we could invert + # the loop around `genSetElem`'s logic... + let first = firstOrd(c.config, n.typ.skipTypes(abstractInst)).toInt() + for x in n: + if x.kind == nkRange: + let a = c.genSetElem(x[0], first) + let b = c.genSetElem(x[1], first) + discard c.irs.irCall(bcInclRange, nil, result, a, b) + else: + let a = c.genSetElem(x, first) + discard c.irCall("incl", mIncl, result, a) + +func irConv(s: var IrStore3, typ: PType, val: IRIndex): IRIndex = + result = s.irCall(bcConv, typ, val) + +func irCast(s: var IrStore3, typ: PType, val: IRIndex): IRIndex = + result = s.irCall(bcCast, typ, val) + + +proc genObjConstr(c: var TCtx, n: PNode): IRIndex = + result = c.getTemp(n.typ) + let t = n.typ.skipTypes(abstractRange+{tyOwned}-{tyTypeDesc}) + var obj: IRIndex + if t.kind == tyRef: + let nSym = c.irs.irSym getSysSym(c.graph, n.info, "internalNew") + discard c.irs.irCall(nSym, result) + obj = c.irs.irDeref(result) + else: + obj = result + + for i in 1.. localIndex: + let f = cr.newJoinPoint() + cr.insertJoin(f) + for v in localIndex.. index into `states` + var nextThreads: seq[IRIndex] # XXX: a circular buffer would make sense here + + var ls: L + var stateIndex: int + + var cursor: IrCursor + cursor.setup(s) + + func startThread(): int = + for i, x in states.mpairs: + if not x[1]: + x[1] = true + return i + + func queue(target: JoinPoint) = + if visited[target] == 2: + # each join point must only be visited a maximum amount of two times + # TODO: wrong! This breaks apart for nested loops. Reset the + # visited counter of all nested join points back to zero on iteration + return + + # TODO: use a binary search here + var p = -1 + for j, t in nextThreads.pairs: + if target >= t: + p = j + break + + if p == -1: + nextThreads.add(target) + elif target != nextThreads[p]: + nextThreads.insert(target, p) + + states.newSeq(1) + states[0] = (default(L), true) + + var i = 0 + while i < s.len: + let n = s.at(i) + cursor.setPos i + case n.kind + of ntkBranch: + let target = n.target + # execute the branch not taken case (i.e. fallthrough) first + queue(target) # queue the branch-taken thread + + if target notin joinPoints: + # TODO: reuse an unused entry in `states` + states.add (states[stateIndex][0], true) + joinPoints[target] = states.high + else: + let idx = joinPoints[target] + pass.merge(states[idx][0], states[stateIndex][0]) + + of ntkGoto: + # TODO: check how loop ends fare + let target = n.target + # end of thread + if target notin joinPoints: + # re-use the previous thread for the new thread + discard + joinPoints[target] = stateIndex + else: + let idx = joinPoints[target] + pass.merge(states[idx][0], states[stateIndex][0]) + + # TODO: improve the next == `target` case + + queue(target) + + # goto join point + let next = nextThreads[0] + nextThreads.delete(0) + stateIndex = joinPoints[next] + + i = s.position(next) + assert s.at(i).kind == ntkJoin # TODO: move this to a separate validation pass + + inc visited[next] + + dec i + + else: + #exec(gs, states[stateIndex], n) + pass.visit gs, states[stateIndex][0], cursor, s, n + + inc i + + if pass.onEnd != nil: + # TODO: what about the exceptional exit path? + pass.onEnd(gs, states[joinPoints[0]][0]) + + s.update(cursor) + + +# location and value lifetimes are different things. A location can live +# longer than it's value, but accessing a location past it's values' lifetime +# is illegal. A `ntkLocEnd` signifies the end-of-life of a location + +# OUTDATED IDEA, mostly obsolete now (some things still apply) +# phase 1: +# alias analysis +# side-effect analysis +# borrow-checking, cursor inference | (`ntkWeakAsgn` is resolved here) +# phase 2 (optional, only when arc/orc are used): +# with the alias data +# inject destructors: +# replace `ntkAsgn:copy` and `ntkAsgn:sink` with calls to `=copy` and +# `=sink` where applicable (i.e. for types that need it). insert `=destroy` +# calls before `ntkLocEnd` where needed +# when orc is enabled: insert the +# +# phase 3: proc inlining (if optimizations are enabled) +# after all procs are inlined, rerun alias analysis, since we have more +# context now +# +# phase 4: lower ntkUse, ntkConsume, and (some) ntkAsgn to ntkLoad and ntkWrite +# +# phase 5: VM code gen + +type + DestrCtx = object + output: IrStore + DestrLocal = object + + AliasCtx = object + AliasesLocal = object + +func mergeFrom(a: var AliasesLocal, b: AliasesLocal) = + discard + +# There are three similar but different concepts used here: ownership, reachability, and derived-ness +# If there exists a valid path expression from location `a` to `b`, `b` is reachable from `a`. Reachability is strongly linked with observability (for example, it is said that a function can observe all locations reachable from it's parameters and globals) +# If destroying location `a`'s value also always destroys location `b`'s value, `a` owns `b` (ownership) +# Derived currently means "part of the location's memory", which would mean that a location owned by a `seq` is not derived from the owner of the seq (and technically also not from the seq itself since it's basically a pointer/ref) + +type AAEvalResult = enum + asfNone + asfRef # location is a `ref` + asfHasRef # location contains a `ref` + +func aliasAnalysisEval(c: AliasCtx, loc: LocIndex): AAEvalResult = + discard + +iterator fields(c: AliasCtx, loc: LocIndex): FieldId = + ## iterates overs all fields relevant for the alias analysis + discard + +func setLoc(ls: AliasesLocal, id: NewNodeId, locId: ValueId) = + discard + +func setHandle(ls: AliasesLocal, id: NewNodeId, locId: ValueId) = + discard + +func setTarget(ls: AliasesLocal, a, locId: ValueId) = + discard + +func uid(c: AliasesLocal, l: LocIndex): ValueId = + discard + +func fieldId(c: AliasesLocal, l: LocIndex, f: FieldId): int = + discard + +func ownedBy(c: AliasesLocal, l: LocIndex): ValueId = + discard + + +func computeAliases(c: var AliasCtx, ls: var AliasesLocal, n: NewNode) = + case n.kind + of ntkAsgn: + # tricky case: up- and downconverting of both ref/ptr and object values + case aliasAnalysisEval(c, n.srcLoc) + of asfRef: + ls.setTarget(ls.uid(n.loc), ls.uid(n.srcLoc)) + # the location is a ref/ptr + of asfHasRef: + # the location contains a ref/ptr + for it in c.fields(n.loc): + # XXX: only the fields which are directly accessed in the current + # work item (i.e. function or block) need to be included here + ls.setTarget(ls.fieldId(n.loc, it), ls.fieldId(n.srcLoc, it)) + of asfNone: + discard + of ntkAddr: + ls.setHandle(n.id, ls.uid(n.srcLoc)) + of ntkDeref: + ls.setLoc(n.id, ls.uid(n.loc)) + of ntkCall: + if n.isViewCall: + # SPEC: the spec says the returned view is _derived_ from the first + # argument, not only _reachable_. It should probably say that the returned handle needs to borrow from a location _owned_ by the first parameter + # SPEC: the spec also says two different things in two different places: + # - "the [result] location must borrow from a location that is derived from the first parameter" + # - "[result] has to be a location derived from the first formal parameter or from a constant location." + + # XXX: we use the "result is _owned_ by first parameter" interpretation here for now + + # since we're not doing inter-procedure analysis, we don't know which location + ls.setLoc(n.id, ls.ownedBy(ls.uid n.arg(0))) + of ntkSym, ntkUse, ntkConsume, ntkLocEnd: + discard "not relevant" + of ntkWrite, ntkLoad: + # too early, these shouldn't exist yet + unreachable(n.kind) + of ntkGoto, ntkBranch: + unreachable(n.kind) + else: + {.warning: "handle this".} + +type Ternary = enum + No, Maybe, Yes + +func aliveState(ls: DestrLocal, loc: LocIndex): Ternary = + discard + +func kill(ls: var DestrLocal, loc: LocIndex) = + ## Mark the location and all locations owned by it's value as dead + +func needsDestroy(c: DestrCtx, loc: LocIndex): bool = + discard + +func insertG*(c: var IrStore, kind: NewNodeKind, id: varargs[NewNodeId]): NewNodeId = + discard + + +func insert*(c: var IrStore, kind: NewNodeKind, id: varargs[NewNodeId]) = + discard + +func keep(c: var IrStore) = + discard + +func makeDestroyHook(c: var DestrCtx, loc: LocIndex): int = + discard + +func mergeFrom(a: var DestrLocal, b: DestrLocal) = + discard + + +iterator aliveStates(c: DestrLocal): (LocIndex, Ternary) = + discard + +func isParam(c: DestrCtx, loc: LocIndex): Ternary = + discard + +func loc(n: IrNode3): LocIndex = + try: + n.srcLoc + except: + n.wrLoc + +# note: mostly outdated now +func computeDestructors(c: var DestrCtx, ls: var DestrLocal, cr: var IrCursor, ir: IrStore3, n: IrNode3) = + ## Destructor computation. Also computes alive states. Basically the ``injectdestructors`` pass. Injects call to the destroy, copy and sink hooks for types that have them. + + proc testAlive() = + case ls.aliveState(n.loc) + of Yes: discard "all good" + of Maybe: + # automatic sinks or moves are only injected where it's safe (otherwise + # a copy is used). If a location is maybe alive, it means that there's a + # code-path where the value is dead (it was moved) + # TODO: collect a warning? + discard + of No: + # definitely a use-after-move + # TODO: collect a warning + discard + + case n.kind + #of ntkAsgn: + of ntkUse: + testAlive() + + of ntkConsume: + testAlive() + # TODO: insert `wasMoved` call? Or use a special node kind? + ls.kill(n.loc) + + of ntkAsgn: + # SPEC: can an assign start a new value lifetime? + discard + + of ntkLocEnd: + if c.needsDestroy(n.loc): + case ls.aliveState(n.loc) + of Yes, Maybe: + # TODO: use `ntkUse` instead? consume is currently used since event + # though a `=destroy` hook takes a `var` param, the modifications + # don't need to be visible + let i = c.output.insertG(ntkConsume, n.loc) + let i2 = c.output.insertG(ntkSym, c.makeDestroyHook(n.loc)) + c.output.insert(ntkCall, n.loc) + c.output.keep() # keep the `ntkLocEnd` node + of No: + # XXX: if a location end is reached and the value is not alive anymore + # AND the dead value is not observable by anyone else (e.g. in + # the case of a local variable), the `wasMoved` after every + # consume on the same CF path as this end can be elided + discard "nothing to do" + + of ntkCall, ntkAddr, ntkDeref, ntkSym: + discard + + of ntkLoad, ntkWrite: + unreachable(n.kind) + of ntkGoto, ntkBranch: + unreachable(n.kind) + else: + {.warning: "handle this".} + +func computeDestructorsEnd(c: var DestrCtx, ls: DestrLocal) = + # Check if a parameter was killed. + for loc, x in ls.aliveStates: + if x == Yes: continue + + case c.isParam(loc) + of Yes: + case x + of Maybe: discard# a parameter's value is maybe dead + of No: discard # a parameter's value is definitely dead + of Yes: discard + of Maybe: + # the value of a location that's potentially reachable from a parameter is: + case x + of Maybe: discard # maybe dead + of No: discard # dead + of Yes: discard # still alive + of No: + discard "don't care" + + +func nthField(n: PNode, pos: int): PSym = + case n.kind + of nkSym: + if n.sym.position == pos: + result = n.sym + of nkRecList: + for it in n.sons: + result = nthField(it, pos) + if result != nil: + return + of nkRecCase: + if n[0].sym.position == pos: + return n[0].sym + + for i in 1.. 0 and t[0] != nil: + result = nthField(t[0].skipTypes(skipPtrs), pos) + +func computeTypes*(ir: IrStore3): seq[PType] = + result.newSeq(ir.len) + var i = 0 + for n in ir.nodes: + case n.kind + of ntkAsgn, ntkJoin, ntkGoto, ntkBranch, ntkContinue: + discard + of ntkCall: + result[i] = + if n.isBuiltIn: + # XXX: built-in calls feel wrong. Using magics instead might be better + n.typ + else: + let callee = ir.at(n.callee) + if callee.kind != ntkSym: + result[n.callee][0] # the callee's return type + elif (let s = ir.sym(callee); s.typ != nil): + s.typ[0] + else: + # the symbol for magics created with ``createMagic`` don't have + # type information + nil + + of ntkLit: + result[i] = ir.getLit(n).typ + of ntkSym: + let s = ir.sym(n) + customAssert s != nil, i + if s.kind notin routineKinds: + # don't compute the type for routine symbols. This makes it easier to + # figure out the type dependencies later on. + result[i] = s.typ + of ntkUse, ntkConsume: + result[i] = result[n.srcLoc] + of ntkLocal: + result[i] = ir.getLocal(i)[1] + of ntkAddr: + # XXX: completely wrong, but we're missing a way to get + # the correct type without creating a new one + result[i] = result[n.addrLoc] + of ntkDeref: + let t = result[n.addrLoc].skipTypes(abstractInst) + customAssert t.kind in {tyPtr, tyRef, tyVar, tyLent}, i + result[i] = t.elemType + of ntkPathObj: + customAssert result[n.srcLoc] != nil, n.srcLoc + let typ = result[n.srcLoc].skipTypes(abstractInst) + let idx = n.fieldIdx + case typ.kind + of tyObject: + let f = typ.nthField(n.fieldIdx) + result[i] = f.typ + of tyTuple: + result[i] = typ[idx] + else: + customAssert false, n.srcLoc + + of ntkPathArr: + result[i] = result[n.srcLoc].elemType() + + else: + debugEcho "computeTypes missing: ", n.kind + inc i + +func getMagic(ir: IrStore3, n: IrNode3): TMagic = + assert n.kind == ntkCall + if n.isBuiltIn: + mNone + else: + let callee = ir.at(n.callee) + if callee.kind == ntkSym: + ir.sym(callee).magic + else: + mNone + +func insertLit(cr: var IrCursor, lit: string): IRIndex = + cr.insertLit newStrNode(nkStrLit, lit) + +func insertLit(cr: var IrCursor, i: int): IRIndex = + cr.insertLit newIntNode(nkIntLit, i) + +proc insertMagicCall(cr: var IrCursor, g: ModuleGraph, name: string, m: TMagic, args: varargs[IRIndex]): IRIndex {.discardable.} = + cr.insertCallExpr(createMagic(g, g.idgen, name, m), args) + +proc insertCompProcCall(cr: var IrCursor, g: ModuleGraph, name: string, args: varargs[IRIndex]): IRIndex {.discardable.} = + cr.insertCallExpr(g.getCompilerProc(name), args) + + +type RefcPassCtx* = object + graph: ModuleGraph + idgen: IdGenerator + types: seq[PType] + + # XXX: only used for the ``lowerSeqs`` passes, but `RefcPassCtx` is + # currently (ab)-used as the context for most passes + localMap: Table[int, int] # old local-name -> new local-name + +func setupRefcPass*(c: var RefcPassCtx, g: ModuleGraph, idgen: IdGenerator, ir: IrStore3) = + c.types = computeTypes(ir) # XXX: very bad + c.graph = g + c.idgen = idgen + +func typeof(c: RefcPassCtx, val: IRIndex): PType = + customAssert c.types[val] != nil, val + c.types[val] + +type StorageLoc = enum + slUnknown + slStack + slHeap + # TODO: also add `slStatic`, used for constants? + +func storageLoc(c: RefcPassCtx, val: IRIndex): StorageLoc = + # TODO: missing + slUnknown + +proc requestRtti(c: var RefcPassCtx, cr: var IrCursor, t: PType): IRIndex = + # refc uses the v1 type-info + cr.insertCallExpr(createMagic(c.graph, c.idgen, "getTypeInfo", mGetTypeInfo), cr.insertLit(newNodeIT(nkType, unknownLineInfo, t))) # TODO: bad; don't create a new mGetTypeInfo sym every time + # TODO: collect for which types rtti was requested + +proc processMagicCall(c: var RefcPassCtx, cr: var IrCursor, ir: IrStore3, m: TMagic, n: IrNode3) = + ## Lowers calls to various magics into calls to `compilerproc`s + case getMagic(ir, n) + of mDestroy: + # An untransformed `mDestroy` indicates a ref or string. `seq` + # destructors were lifted into specialized procs already + let val = n.args(0) + case c.typeof(val).kind + of tyString: + cr.replace() + cr.insertCompProcCall(c.graph, "genericSeqAssign") + of tyRef: + # XXX: only non-injected destroys for refs should be turned + cr.replace() + let nilLit = cr.insertLit(newNode(nkNilLit)) + let r = c.storageLoc(val) + case r + of slStack: + # if it's on the stack, we can simply assign 'nil' + cr.insertAsgn(askShallow, val, nilLit) + of slHeap: + cr.insertCompProcCall(c.graph, "asgnRef", val, nilLit) + of slUnknown: + cr.insertCompProcCall(c.graph, "unsureAsgnRef", val, nilLit) + else: + discard + + of mNew: + cr.replace() + # TODO: alignment value missing + let v = cr.insertCompProcCall(c.graph, "newObjRC1", c.requestRtti(cr, c.typeof(n.args(0))), cr.insertLit(0)) + # XXX: not sure about `askMove` here... + cr.insertAsgn(askMove, n.args(0), v) + + else: + discard "ignore" + +proc genRefcRefAssign(cr: var IrCursor, g: ModuleGraph, dst, src: IRIndex, sl: StorageLoc) = + # TODO: document + case sl + of slStack: + cr.insertAsgn(askShallow, dst, src) + of slHeap: + cr.insertCompProcCall(g, "asgnRef", dst, src) + of slUnknown: + cr.insertCompProcCall(g, "unsureAsgnRef", dst, src) + + +proc applyRefcPass(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = + case n.kind + of ntkAsgn: + case n.asgnKind + of askMove: + if c.typeof(n.wrLoc).kind in {tyString, tyRef, tySequence}: + genRefcRefAssign(cr, c.graph, n.wrLoc, n.srcLoc, c.storageLoc(n.wrLoc)) + # XXX: source needs to be zeroed? + of askCopy: + case c.typeof(n.wrLoc).kind + of tyString: + cr.replace() + cr.insertCompProcCall(c.graph, "copyString", n.wrLoc, n.srcLoc) + of tySequence: + cr.replace() + cr.insertCompProcCall(c.graph, "genericSeqAssign", n.wrLoc, n.srcLoc) + else: + discard + of askInit, askShallow, askDiscr: + # XXX: init might need special handling + discard + + of ntkCall: + processMagicCall(c, cr, ir, getMagic(ir, n), n) + else: + discard + +type HookCtx* = object + graph: ModuleGraph + types: seq[PType] + +func initHookCtx*(g: ModuleGraph, ir: IrStore3): HookCtx = + HookCtx(graph: g, types: computeTypes(ir)) + +func hasAttachedOp*(c: HookCtx, op: TTypeAttachedOp, typ: PType): bool = + assert typ != nil + c.graph.getAttachedOp(typ, op) != nil + + +func typeof(c: HookCtx, n: IRIndex): PType = + customAssert c.types[n] != nil, n + c.types[n] + +func injectHooks(c: HookCtx, n: IrNode3, cr: var IrCursor) = + ## Replaces assignments and destroys with calls to the copy, sink, and destroy hooks. + # TODO: rename. We're not injecting anything, just replacing + case n.kind + of ntkAsgn: + let typ = c.typeof(n.wrLoc) + case n.asgnKind + of askInit: + # TODO: missing + discard + of askMove: + if hasAttachedOp(c, attachedSink, typ): + cr.replace() + cr.insertCallStmt(c.graph.getAttachedOp(typ, attachedSink), n.wrLoc, n.srcLoc) + of askCopy: + if hasAttachedOp(c, attachedAsgn, typ): + cr.replace() + cr.insertCallStmt(c.graph.getAttachedOp(typ, attachedAsgn), n.wrLoc, n.srcLoc) + + of askShallow, askDiscr: + discard "nothing to do" + + of ntkCall: + # XXX: the full IR (needed for magic lookup) is missing here + #[ + if getMagic(irs, n) == mDestroy: + if hasAttachedOp(c, attachedDestructor, c.typeof(n.args(0))): + cr.replace() + cr.insertCall(c.graph.getAttachedOp(typ, attachedDestructor), n.args(0)) + ]# + discard + else: + discard + +func insertError(cr: var IrCursor, err: string): IRIndex {.discardable.} = + cr.insertCallExpr(bcError, nil, cr.insertLit err) + + +type GenericTransCtx = object + graph: ModuleGraph + types: seq[PType] + +func setupTransCtx*(g: ModuleGraph, ir: IrStore3): GenericTransCtx = + result.graph = g + result.types = computeTypes(ir) + +# XXX: the field position is not necessarily 2; the value should be detected +# during compilation instead +const SeqDataFieldPos = 2 + +proc requestSeqType(c: var RefcPassCtx, t: PType): PType = + ## `t` is the original ``tySequence`` type. The resulting type has the following definition: + ## + ## .. code:: nim + ## type NimSeq = ptr object of TGenericSeq + ## data: UncheckedArray[t.elemType] + + let cache = c.graph.cache + + # TODO: use a cache for the instantiations + # XXX: yeah, this is bad; same as for symbols, it'd probably be a good idea to + # introduce a custom type representation for the whole compiler backend + let + typSym = newSym(skType, cache.getIdent("NimSeq"), c.idgen.nextSymId(), t.owner, t.owner.info) + objTyp = newType(tyObject, c.idgen.nextTypeId(), t.owner) + f = newSym(skField, cache.getIdent("data"), c.idgen.nextSymId(), typSym, t.owner.info) + + f.position = SeqDataFieldPos + f.typ = newType(tyUncheckedArray, nextTypeId c.idgen, t.owner) + f.typ.add t.elemType + + objTyp.add(c.graph.getCompilerProc("TGenericSeq").typ) # base type + objTyp.n = newTree(nkRecList, [newSymNode(f)]) + + result = newType(tyPtr, c.idgen.nextTypeId(), t.owner) + result.add(objTyp) + result.linkTo(typSym) + +proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = + ## Lowers the `seq`-related magic operations into calls to the v1 `seq` + ## implementation + case n.kind + of ntkCall: + case getMagic(ir, n) + of mSetLengthStr: + cr.replace() + # TODO: is shallow correct here? + cr.insertAsgn(askShallow, n.args(0), cr.insertCompProcCall(c.graph, "setLengthStr", n.args(0), n.args(1))) + of mSetLengthSeq: + cr.replace() + # TODO: evaluation order might be violated here + cr.insertAsgn(askShallow, n.args(0), cr.insertCompProcCall(c.graph, "setLengthSeqV2", n.args(0), c.requestRtti(cr, c.typeof(n.args(0))), n.args(1))) + + of mNewSeq: + cr.replace() + + let val = n.args(0) + let nilLit = cr.insertLit(newNode(nkNilLit)) + + let sl = c.storageLoc(val) + case sl + of slHeap, slUnknown: + # write barrier + # TODO: document + let target = cr.newJoinPoint() + cr.insertBranch(cr.insertMagicCall(c.graph, "isNil", mIsNil), target) + # TODO: use nimGCunrefNoCylce when applicable + cr.insertCompProcCall(c.graph, "nimGCunrefRC1", val) + cr.insertAsgn(askShallow, val, nilLit) + cr.insertGoto(target) + cr.insertJoin(target) + + var ns = cr.insertCompProcCall(c.graph, "newSeq", c.requestRtti(cr, c.typeof(val)), n.args(1)) + ns = cr.insertCast(c.typeof(val), ns) + cr.insertAsgn(askShallow, val, ns) + of slStack: + + var ns = cr.insertCompProcCall(c.graph, "newSeq", c.requestRtti(cr, c.typeof(val)), n.args(1)) + ns = cr.insertCast(c.typeof(val), ns) + cr.insertAsgn(askShallow, val, ns) + + of mNewSeqOfCap: + cr.replace() + + let val = cr.position + discard cr.insertCast(c.typeof(val), cr.insertCompProcCall(c.graph, "nimNewSeqOfCap", c.requestRtti(cr, c.typeof(val)), n.args(0))) + + of mAppendSeqElem: + # ``seq &= x`` is transformed into: + # ``seq = cast[typeof(seq)](incrSeqV3(seq, getTypeInfo(2)))`` + # ``seq = `` + cr.replace() + let seqVal = n.args(0) + let typ = c.typeof(seqVal).skipTypes({tyVar}) + + # XXX: if the refc pass would be run after the `lowerSeqV1` pass, a + # `askMove` assignment could be used here instead + cr.genRefcRefAssign(c.graph, seqVal, cr.insertCast(typ, cr.insertCompProcCall(c.graph, "incrSeqV3", seqVal, c.requestRtti(cr, typ)) ), c.storageLoc(seqVal)) + + # TODO: filling the element and adjusting the seq length is missing + discard cr.insertCallExpr(bcError, nil, cr.insertLit "Not implemented: lowerSeqsV1.mAppendSeqElem") + + of mAppendStrStr: + cr.replace() + var lens: array[2, IRIndex] + #lens[0] = genIfThanElse() # we `len` call needs to be lowered directly + discard cr.insertCallExpr(bcError, nil, cr.insertLit "Not implemented: lowerSeqsV1.mAppendStrStr") + + of mLengthStr: + cr.replace() + # XXX: might be a good idea to cache the `string` type + let strTyp = c.graph.getCompilerProc("NimStringDesc") + #genIfThanElse(cr.insertMagicCall("isNil", mIsNil, a.val)) + + discard cr.insertCallExpr(bcError, nil, cr.insertLit "Not implemented: lowerSeqsV1.mLengthStr") + + else: + discard + + of ntkLocal: + # replace locals of `seq` and `string` type with locals of the lowered type + # XXX: there's currently no way to replace an existing local (would be + # simpler and more efficient), so the logic here resorts to + # introducing a new local and replacing all reference to the old one + let (lk, origTyp, sym) = ir.getLocal(cr.position) + + let typ = origTyp.skipTypes(abstractInst) + + # TODO: handle ``var`` and ``lent`` wrapped types here + case typ.kind + of tySequence: + # replace seqs with `NimSeq`. The latter is bascially a generic type + # that were instantiating manually here. The old C backend did this step + # in the code-generator + cr.replace() + + let idx = ir.getLocalIdx(cr.position) + var newName = c.localMap.getOrDefault(idx, -1) + if newName == -1: + # XXX: ugly; the whole backend would probably benefit from it's own + # symbol representation + let nt = c.requestSeqType(typ) + if sym != nil: + let ns = copySym(sym, c.idgen.nextSymId()) + ns.typ = nt + + newName = cr.newLocal(lk, ns) + else: + newName = cr.newLocal(lk, nt) + + c.localMap[idx] = newName + + discard cr.insertLocalRef(newName) + + of tyString: + # replace `string` with `NimString` + + cr.replace() + + let idx = ir.getLocalIdx(cr.position) + var newName = c.localMap.getOrDefault(idx, -1) + if newName == -1: + # XXX: ugly; the whole backend would probably benefit from it's own + # symbol representation + + let nt = c.graph.getCompilerProc("NimString").typ + if sym != nil: + let ns = copySym(sym, c.idgen.nextSymId()) + ns.typ = nt + newName = cr.newLocal(lk, ns) + else: + newName = cr.newLocal(lk, nt) + + c.localMap[idx] = newName + + discard cr.insertLocalRef(newName) + + else: + discard + + of ntkSym: + # replace `string` and `seq` types of globals and parameters by directly + # modifying the `PSym`s + + let sym = ir.sym(n) + + # XXX: the symbol patching here won't work out... + if sym.kind notin {skVar, skLet, skParam}: + # XXX: ignore constants for now + return + + var newTyp: PType = nil + + let typ = sym.typ.skipTypes(abstractInst) + let newType = + case skipTypes(typ, {tyVar, tyLent}).kind + of tySequence: c.requestSeqType(typ) + of tyString: c.graph.getCompilerProc("NimString").typ + else: nil + + # this overwrites possibly present ``tyGenericInst``, ``tyDistinct``, + # etc. but at this point in the backend, we no longer need those + if newType != nil: + if typ.kind == tyVar: + # only ``var seq`` is treated as a pointer-to-pointer, not ``lent`` + sym.typ = newType(tyVar, nextTypeId c.idgen, typ.owner) + sym.typ.add newType + else: + sym.typ = newType + + of ntkPathArr: + let arrTyp = c.typeof(n.srcLoc).skipTypes(abstractInst) + + # TODO: needs tests + case skipTypes(arrTyp, {tyVar, tyLent}).kind + of tyString, tySequence: + # --> x[].data[idx] + + cr.replace() + var r = cr.insertDeref(n.srcLoc) + # a `lent seq` is not a treated as a `ptr NimSeq` but just as `NimSeq` + # (`NimSeq` itself is a pointer type) + if arrTyp.kind == tyVar: + r = cr.insertDeref(r) + + r = cr.insertPathObj(r, SeqDataFieldPos) + discard cr.insertPathArr(r, n.arrIdx) + + else: + discard + + else: + discard "ignore" + +func lowerSeqsV2(c: GenericTransCtx, n: IrNode3, cr: var IrCursor) = + ## Lowers the `seq`-related magic operations into calls to the v2 `seq` + ## implementation. Enabled by the `optSeqDestructors` toggle + doAssert false, "missing" + +type LiftPassCtx* = object + graph*: ModuleGraph + idgen*: IdGenerator + cache*: IdentCache + + typeInfoMarker*: Table[SigHash, PSym] # sig hash -> type info sym + + syms*: seq[(PSym, PType)] ## all lifted globals + +proc liftTypeInfoV1(c: var LiftPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = + ## Turns all ``mGetTypeInfo`` calls into globals and collects the newly + ## created symbols + # XXX: can this really be considered lifting? + case n.kind + of ntkCall: + if getMagic(ir, n) == mGetTypeInfo: + cr.replace() + + let + typ = ir.getLit(ir.at(n.args(0))).typ + sig = hashType(typ) + + assert typ != nil + + var s = c.typeInfoMarker.getOrDefault(sig) + if s == nil: + # TODO: either use a `Rope` here or use a string buffer stored in + # `LiftPassCtx` that is reserved for temporary usage like this + let name = "NTI" & $sig & "_" # XXX: too many short-lived and unnecessary allocations + + # the symbol is owned by the module the type is owned by + s = newSym(skVar, c.cache.getIdent(name), c.idgen.nextSymId(), typ.owner.getModule(), unknownLineInfo) + # TODO: cache the `TNimType` type + s.typ = c.graph.getCompilerProc("TNimType").typ + s.flags.incl sfGlobal + + c.typeInfoMarker[sig] = s + + # TODO: cache the `pointer` type + discard cr.insertCast(c.graph.getSysType(unknownLineInfo, tyPointer), cr.insertSym s) + + else: + discard + +const ErrFlagName = "nimError" + +proc lowerTestError*(ir: var IrStore3, g: ModuleGraph, cache: IdentCache, idgen: IdGenerator, owner: PSym) = + ## Lowers ``bcTestError`` builtin calls for the C-like targets. Turns + ## ``bcTestError`` into ``unlikelyProc(ErrFlagName[])`` and inserts a + ## + ## .. code:: nim + ## let ErrFlagName = nimErrorFlag() + ## + ## at the top, but only if the error flag is actually accessed! + # XXX: a `LinearPass` can't be used here, since we need to insert + # XXX: "lower" is the wrong terminology here + # XXX: maybe this should happen as part of ``irgen`` instead? + + var + cr: IrCursor + addedErr: bool + errFlag: IRIndex + + cr.setup(ir) + + for i in 0..>3] &(1U<<((NU)($2)&7U)))!=0)") + + else: + discard + else: + discard + + +proc lowerRangeChecks*(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = + ## Lowers ``bcRangeCheck`` (nkChckRange, nkChckRangeF, etc.) into simple comparisons + # XXX: the lowering could be simplified by just replacing the range check + # with a call to a ``chkRange`` inline function that'd be defined in + # ``system.nim`` for the C-like targets + + case n.kind + of ntkCall: + if n.isBuiltIn and n.builtin == bcRangeCheck: + let srcTyp = c.typeof(n.args(0)) + + cr.replace() + var cond: IRIndex + var raiser: string + + case srcTyp.kind + of tyUInt, tyUInt64: + # .. code:: nim + # cast[dstTyp](high) < val + cond = cr.insertMagicCall(c.graph, "<", mLtU, cr.insertCast(srcTyp, n.args(0)), n.args(2)) + raiser = "raiseRangeErrorNoArgs" + else: + let dstTyp = skipTypes(c.typeof(cr.position), abstractVarRange) + case dstTyp.kind + of tyUInt8..tyUInt32, tyChar: + raiser = "raiseRangeErrorU" + of tyFloat..tyFloat128: + raiser = "raiseRangeErrorF" + let conv = cr.insertConv(dstTyp, n.args(0)) + # no need to lower the `or` into an `ntkBranch` + `ntkJoin` here; it has no impact on further analysis + cond = cr.insertMagicCall(c.graph, "or", mOr, cr.insertMagicCall(c.graph, "<", mLtF64, conv, n.args(1)), cr.insertMagicCall(c.graph, "<", mLtF64, n.args(2), conv)) + + else: + cr.insertError("missing chkRange impl") + + raiser = + case skipTypes(c.typeof(cr.position), abstractVarRange).kind + of tyFloat..tyFloat128: "raiseRangeErrorF" + else: "raiseRangeErrorI" + + #[ + let boundaryCast = + if n0t.skipTypes(abstractVarRange).kind in {tyUInt, tyUInt32, tyUInt64} or + (n0t.sym != nil and sfSystemModule in n0t.sym.owner.flags and n0t.sym.name.s == "csize"): + "(NI64)" + else: + "" + ]# + + let target = cr.newJoinPoint() + cr.insertBranch(cr.insertMagicCall(c.graph, "not", mNot, cond), target) + cr.insertCompProcCall(c.graph, raiser, n.args(0), n.args(1), n.args(2)) + # XXX: it would be nice if we could also move the following + # ``if bcTestError(): goto error`` into the branch here + + cr.insertJoin(target) + discard cr.insertConv(dstTyp, n.args(0)) + + else: + discard + +const hookPass* = LinearPass[HookCtx](visit: injectHooks) +const refcPass* = LinearPass2[RefcPassCtx](visit: applyRefcPass) +const seqV1Pass* = LinearPass2[RefcPassCtx](visit: lowerSeqsV1) +const seqV2Pass* = LinearPass[GenericTransCtx](visit: lowerSeqsV2) +const typeV1Pass* = LinearPass2[LiftPassCtx](visit: liftTypeInfoV1) +const lowerRangeCheckPass* = LinearPass2[RefcPassCtx](visit: lowerRangeChecks) +const lowerSetsPass* = LinearPass2[RefcPassCtx](visit: lowerSets) \ No newline at end of file diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim new file mode 100644 index 00000000000..fa54cc5b4c4 --- /dev/null +++ b/compiler/vm/irtypes.nim @@ -0,0 +1,143 @@ +## The definitions for the type representation used by the compiler back-end (mid-end?) IR. + +type RecordNodeKind* = enum + rnkEmpty # meant to be used by the garbage collector to fill cleaned slots + rnkList + rnkCase + rnkBranch + +type RecordNode* = object + kind: RecordNodeKind + len: uint32 ## the number of items + a: uint32 ## + b: uint32 + +type RecordId* = distinct uint32 + +type TypeId* = distinct uint32 + +type SymId* = distinct uint32 + +type TypeNodeKind = enum + tnkEmpty + + tnkInt + tnkFloat + + tnkRef + tnkPtr + tnkVar + tnkLent + + tnkSeq + tnkOpenArray + tnkString + #tnkSink # XXX: ? + + tnkRecord # tuples and objects + tnkArray + + tnkProc + + tnkDistinct # XXX: not sure + + tnkImported # an imported type. Has one child node, used to derive the access semantics from + tnkName # a reference to named type + + #tnkAlias + +type FieldDesc* = object + sym: SymId # may be empty + typ: TypeId + # XXX: bitsize should likely be stored as part of FieldDesc + +type TypeNode* = object + kind: TypeNodeKind + a: uint32 + b: uint32 + +type TypeEnv* = object + ## Holds the data for all types + # XXX: in general, a `seq[seq]` could be used for `records`, `fields`, and + # `types`. This would make access a bit simpler; require less copying + # on resize; and make garbge collection easier. It would also increase + # memory fragmentation and reduce cache locality + records: seq[RecordNode] ## the bodies for all record-like types (objects and tuples) in one contiguous seq + fields: seq[FieldDesc] ## all fields + types: seq[TypeNode] ## all types in one contiguous seq + # XXX: maybe a redirection table for `tnkName` makes sense? Alternatively, + # indirections to another tnkName could be allowed + +type TypeLookup* = object + ## Data needed for mapping ``PType`` to the ``TypeId`` + + +# XXX: copied from `ccgtypes`, might need some adjustments +const + irrelevantForBackend = {tyGenericBody, tyGenericInst, tyGenericInvocation, + tyDistinct, tyRange, tyStatic, tyAlias, tySink, + tyInferred, tyOwned} + +func skipTypesConsiderImported(t: PType, kinds: TTypeKinds): tuple[imported: bool, t: PType] = + result.t = t + while result.t.kind in kinds: + result.imported = t.sym != nil and sfImportc in t.sym.flags + if result.imported: + return + result.t = lastSon(result.t) + +func addField(dest: var TypeEnv, s: PSym) = + discard + +func translate(dest: var TypeEnv, n: PNode): tuple[fields, entries: int] = + func `+=`(a: var (int, int), b: (int, int)) {.inline.} = + a[0] += b[0] + a[1] += b[1] + + case n.kind + of nkSym: + addField(dest, n) + result = (1, 0) + of nkRecList: + dest.records.add RecordNode(kind: rnkList) + result.entries += 1 + + for it in n.sons: + result += translate(dest, it) + + of nkRecCase: + dest.records.add RecordNode(kind: rnkCase) + result.entries = n.len # the case entry and all branch entries + + addField(dest, n[0].sym) # discriminator + + result.entries = n.len + + else: + unreachable(n.kind) + + +func translate(dest: var TypeEnv, t: PType) = + let r = t.skipTypesConsiderImported(irrelevantForBackend) + + let t = + if r.imported: + dest.add TypeNode(kind: tnkImported) + r.t.skipTypes(irrelevantForBackend) + else: + r.t.skipTypes() + + case t.kind + of tyObject: + dest.types.add TypeNode(kind: tnkRecord, a: RecordId(dest.records.len)) + translate(t.n) + + of tyObject + + of tyRef, tyPtr, tyVar, tyLent, tySequence, tyOpenArray: + const Map = {tyRef: tnkRef, tyPtr: tnkPtr, tyVar: tnkVar, tyLent: tnkLent, tySequence: tnkSeq, tyOpenArray: tnkOpenArray}.toTable + dest.types.add TypeNode(kind: Map[t.kind]) + translate(dest, t.lastSon) + + else: + unreachable(t.kind) diff --git a/compiler/vm/vm_enums.nim b/compiler/vm/vm_enums.nim index 7772593b1db..5b7dbc43b31 100644 --- a/compiler/vm/vm_enums.nim +++ b/compiler/vm/vm_enums.nim @@ -178,6 +178,8 @@ type avrNoLocation ## Address points to valid VM memory. but not to the start ## of a location +const opcCpReg* = TOpcode(1) + const firstABxInstr* = opcTJmp largeInstrs* = { # instructions which use 2 int32s instead of 1: diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim new file mode 100644 index 00000000000..6212d8e9f0c --- /dev/null +++ b/compiler/vm/vmir.nim @@ -0,0 +1,2056 @@ +import compiler/vm/vmdef +import compiler/vm/vmtypegen +import compiler/ast/ast_types +import compiler/front/msgs +import compiler/ast/reports +import compiler/ast/lineinfos +import compiler/front/options +import std/algorithm +import std/intsets +import std/tables +import compiler/ast/ast_query +import compiler/ic/bitabs + +type IRIndex* = int +const InvalidIndex* = -1 # XXX: it would be better for `InvalidIndex` to be '0' + +type VmTypeId = uint32 + +type + HandleInfo = object + root: int32 + sub: uint32 + + Path = object + src: IRIndex + sub: seq[uint32] + typ: VmTypeId + + + PathIndex = #[distinct]# IRIndex + ## The ID of a path expression. Currently an `IRIndex`, but paths might be + ## stored in a separate list later on + + JoinPoint* = #[distinct]# IRIndex + + IrNodeKindTwo = enum + inktGlobal + inktConstLoc + inktProc + inktConst + inktImm + inktCursor + inktPathArr + inktPathObj + inktPathCall + inktLd + inktWr + inktLdDeref + inktWrDeref + inktCommit ## links a call expression with barriers + inktBarrier + inktOp + inktStmt + inktCallExpr + inktCallStmt + + inktAddr + + inktTemp + inktLocal + inktParam + inktJoinFwd + inktJoin + + inktBranch + inktGoto + + # XXX: structure is temporary + IrNode2 = object + case kind: IrNodeKindTwo + of inktLd, inktLdDeref: + ldSrc: IRIndex + srcVmTyp: VmTypeId + srcTyp: TTypeKind + of inktWr, inktWrDeref: + wrDst, wrSrc: IRIndex + dstVmTyp: VmTypeId + dstTyp: TTypeKind + + of inktPathObj: + field: uint16 + objSrc: PathIndex + of inktPathArr: + arrSrc: PathIndex + idx: IRIndex + of inktImm: + immediate: uint32 + of inktConst: + litId: uint32 + of inktGlobal, inktProc, inktConstLoc: + linkIndex: uint32 + + of inktOp, inktStmt: + opc: TOpcode + opArgs: seq[IRIndex] + + of inktCallExpr, inktCallStmt, inktPathCall: + hasSideEffect: bool + name: IRIndex + args: seq[IRIndex] + + of inktCommit: + call: IRIndex + barriers: seq[IRIndex] + + of inktTemp, inktLocal: + tmpName: uint32 + typ: VmTypeId + + of inktParam: + param: uint32 + + of inktJoinFwd: + fwd: JoinPoint + + of inktBranch: + branchOp: TOpcode + cond: IRIndex + target: JoinPoint + + of inktGoto: + gotoOp: TOpcode + gotoTarget: JoinPoint + of inktAddr: + addrLoc: IRIndex + + else: + discard + + IrStore* = object + handles: seq[HandleInfo] + paths: seq[Path] + ops: seq[IrNode2] + + nextTemp: uint32 + + IrNodeKind3* = enum + ntkAsgn + ntkUse + ntkConsume + ntkCall + ntkAddr + ntkDeref + ntkLocEnd + + ntkSym + ntkRoot # a handle + ntkLocal # references a local + ntkLit + ntkImm + + ntkPathArr + ntkPathObj + + ntkBranch + ntkGoto + ntkJoin + ntkGotoLink + ntkGotoCont # goto with continuation + ntkContinue # goto the active continuation + + # phase 4 + ntkLoad + ntkWrite + + AssignKind* = enum + askShallow + askInit # XXX: an assign can be both an init assign and a shallow assign + askMove + askCopy + + askDiscr # XXX: would be simpler if this would be a magic call instead (e.g. `switch`) + + BuiltinCall* = enum + bcError # encodes an error in the IR # XXX: make this a dedicated node? + bcNewClosure # setup closure + bcSwitch # switch variant branch + bcOf # 'of' branch + bcGetBranchIndex # compute the branch index + bcRaise + bcTestError + bcRaiseFieldErr + bcInclRange + bcRangeCheck + bcConv + bcCast # XXX: cast and conv should become dedicated ir nodes + bcOverflowCheck + + IrNode3* = object + case kind: IrNodeKind3 + of ntkAsgn: + asgnKind: AssignKind + wrDst, wrSrc: IRIndex + discard + of ntkImm: + immediate: uint32 + of ntkSym: + symIdx: int + of ntkPathObj: + field: uint16 + objSrc: PathIndex + of ntkPathArr: + arrSrc: PathIndex + idx: IRIndex + of ntkLit: + lit: PNode + of ntkLocal: + local: int + of ntkAddr, ntkDeref: + addrLoc: PathIndex + of ntkGoto, ntkGotoLink: + gotoTarget: JoinPoint + of ntkBranch: + cond: IRIndex + target: JoinPoint + of ntkGotoCont: + contTarget: JoinPoint + contThen: JoinPoint + of ntkCall: + case isBuiltin: bool + of true: + builtin: BuiltinCall + typ: PType # the return type + of false: callee: IRIndex + + args: seq[IRIndex] + of ntkUse, ntkConsume: + theLoc: IRIndex + of ntkJoin: + joinPoint: JoinPoint + else: + discard + + LocalKind* = enum + lkTemp + lkVar + lkLet + + IrStore3* = object + nodes: seq[IrNode3] + joins: seq[(IRIndex, bool)] # joint point id -> ir position + syms: seq[PSym] + locals: seq[(LocalKind, PType, PSym)] + + localSrc: seq[seq[StackTraceEntry]] + sources: seq[seq[StackTraceEntry]] # the stack trace of where each node was added + CodeFragment* = object + code*: seq[TInstr] + debug*: seq[TLineInfo] + + IrGenError = object of ValueError + report: SemReport + + InstrInfo = object + ## Description of an instrunction representing a magic + isInOut: bool ## whether the output is also an input + hasResult: bool + isBx: bool + hasImm: bool + + GenState* = object + regs: seq[bool] + nodeRegs: seq[TRegister] + + types {.cursor.}: seq[PVmType] + +const InstrInfos: array[TOpcode, InstrInfo] = default(array[TOpcode, InstrInfo]) + +func fail(info: TLineInfo, kind: ReportKind; + loc = instLoc()) {.noreturn, noinline.} = + var report = SemReport(kind: kind, location: some(info), + reportInst: toReportLineInfo(loc)) + raise (ref IrGenError)(report: report) + + +template missingImpl*() = assert false + + +func traceFor*(s: IrStore3, i: IRIndex): seq[StackTraceEntry] = + s.sources[i] + +func traceForLocal*(s: IrStore3, i: int): seq[StackTraceEntry] = + s.localSrc[i] + +# version 1 + + +#func irConst(c: var TCtx, litIdx: int): IRIndex = +# discard + +#func irAddCall(c: var TCtx, i: IRIndex, args: seq[IRIndex]) = +# discard + +#func irAddCallInd(c: var TCtx, i: IRIndex) = +# discard + +func irMoved*(c: var IrStore, i: IRIndex): IRIndex = + missingImpl() +#[ +func irObjAcc*(c: var TCtx, s: IRIndex, f: int): IRIndex = discard +func irArrAcc*(c: var TCtx, s: IRIndex, i: IRIndex): IRIndex = discard +func irWrObj*(c: var TCtx, d: IRIndex, f: int, v: IRIndex): IRIndex = discard +func irWrArr*(c: var TCtx, d, i, v: IRIndex): IRIndex = discard +func irWrLoc*(c: var TCtx, d: IRIndex, s: IRIndex): IRIndex = discard +]# +#func irGlobal(c: var TCtx, linkIndex: uint32): IRIndex = discard + + +#[ +func irDep*(c: var TCtx, val: IRIndex, stmt: IRIndex): IRIndex = + ## encodes `val` having a control-flow dependency on `stmt` + discard +]# + +#func irOp(c: var TCtx, op: TOpcode, x: varargs[IRIndex]): IRIndex = discard +#func irOpMut*(c: var TCtx, op: TOpcode, x: varargs[IRIndex]): IRIndex = discard + + +#func irPhony*(c: var TCtx, x: varargs[IRIndex]): IRIndex = +# discard + +#func irFork*(c: var TCtx): IRIndex = +# discard + +#func irPred*(c: var TCtx): IRIndex = +# discard + +#func irJoin*(c: var TCtx, paths: varargs[IRIndex]): IRIndex = +# discard + +#func irSetCf*(c: var TCtx, p: IRIndex) = discard + +#func irOfBranch*(c: var TCtx, op: TOpcode, lit: int): IRIndex = discard +#func irBranch*(c: var TCtx, op: TOpcode, i: IRIndex): IRIndex = discard +#func irBranch*(c: var TCtx, op: TOpcode, a, b: IRIndex): IRIndex = missingImpl() + + +func add(x: var IrStore, n: sink IrNode2): IRIndex = + result = x.ops.len.IRIndex + x.ops.add n + + +func add(x: var IrStore3, n: sink IrNode3): IRIndex = + result = x.nodes.len.IRIndex + x.nodes.add n + {.noSideEffect.}: + x.sources.add getStackTraceEntries() + +## version 2/3 + + +func genLocal*(c: var IrStore3, kind: LocalKind, typ: PType): int = + assert typ != nil + c.locals.add((kind, typ, nil)) + result = c.locals.high + {.noSideEffect.}: + c.localSrc.add(getStackTraceEntries()) + +func genLocal*(c: var IrStore3, kind: LocalKind, sym: PSym): int = + ## A local that has a symbol + assert sym.typ != nil + c.locals.add((kind, sym.typ, sym)) + result = c.locals.high + {.noSideEffect.}: + c.localSrc.add(getStackTraceEntries()) + +func irContinue*(c: var IrStore3) = + discard c.add(IrNode3(kind: ntkContinue)) + +func irUse*(c: var IrStore3, loc: IRIndex): IRIndex = + c.add(IrNode3(kind: ntkUse, theLoc: loc)) + +proc irSym*(c: var IrStore3, sym: PSym): IRIndex = + # TODO: don't add duplicate items? + assert sym != nil + c.syms.add(sym) + c.add(IrNode3(kind: ntkSym, symIdx: c.syms.high)) + +func irDeref*(c: var IrStore3, val: IRIndex): IRIndex = + c.add(IrNode3(kind: ntkDeref, addrLoc: val)) + +proc irGlobal*(c: var IrStore, linkIndex: uint32): IRIndex = + ## A global + c.add(IrNode2(kind: inktGlobal, linkIndex: linkIndex)) + +proc irConstLoc*(c: var IrStore, linkIndex: uint32): IRIndex = + ## A complex constant + c.add(IrNode2(kind: inktConstLoc, linkIndex: linkIndex)) + +func irProc*(c: var IrStore, linkIndex: uint32): IRIndex = + ## A procedure + c.add(IrNode2(kind: inktProc, linkIndex: linkIndex)) + +func irTemp*(c: var IrStore, typ: VmTypeId): IRIndex = + ## Introduces a new temporary with a unique name + result = c.add(IrNode2(kind: inktTemp, tmpName: c.nextTemp, typ: typ)) + inc c.nextTemp + +func irParam*(c: var IrStore, pos: uint32): IRIndex = + ## A path component. Refers to a parameter + c.add(IrNode2(kind: inktParam, param: pos)) + +proc irConst*(c: var IrStore, i: uint32): IRIndex = + ## Load a simple constant with the given literal index `i` + c.add(IrNode2(kind: inktConst, litId: i)) + +proc irImm*(c: var IrStore3, val: uint32): IRIndex = + ## Load an immediate int value. + ## TODO: maybe store the `PNode` in the IR for a `irkConst` instead? And + ## move const handling to stage 2? + c.add(IrNode3(kind: ntkImm, immediate: val)) + +func irCursor*(c: var IrStore, i: IRIndex): IRIndex = + ## Introduce a cursor (i.e. a shallow copy) of `i` + missingImpl() + +proc irPathArr*(c: var IrStore3, src: IRIndex, idx: IRIndex): IRIndex = + ## Path constructor. `src` must be a location or path representing an + ## array; `idx` must be a value (both literal and run-time value) + c.add(IrNode3(kind: ntkPathArr, arrSrc: src, idx: idx)) + +proc irPathObj*(c: var IrStore3, src: IRIndex, idx: int): IRIndex = + ## Path constructor. `src` must be a location or path representing a + ## record + # TODO: `idx` should be a ``uint16`` + c.add(IrNode3(kind: ntkPathObj, objSrc: src, field: idx.uint16)) + +proc irLd*(c: var IrStore, path: PathIndex#[, tk: TTypeKind, typ: VmTypeId]#): IRIndex = + ## Represents the beginning of lifetime of a loaded location + c.add(IrNode2(kind: inktLd, ldSrc: path#[, srcTyp: tk, srcVmTyp: typ]#)) + +proc irLdDeref*(c: var IrStore, handle: IRIndex): IRIndex = + ## Dereference a pointer, reference or ``var``/``lent`` + missingImpl() + +proc irAsgn*(c: var IrStore3, kind: AssignKind, path: PathIndex, src: IRIndex): IRIndex {.discardable.} = + ## Write to a location + #doAssert c.ops[path].kind in {inktPathObj, inktPathArr, inktPathCall} + # TODO: declare proper sets and use them here + #doAssert c.nodes[path].kind in {ntkLoc, ntkPathObj, ntkPathArr, ntkCall}, $c.ops[path].kind + #doAssert c.nodes[src].kind notin {inktGlobal, inktLocal, inktTemp} + doAssert path != InvalidIndex + doAssert src != InvalidIndex + c.add(IrNode3(kind: ntkAsgn, asgnKind: kind, wrDst: path, wrSrc: src)) + +proc irWrDeref*(c: var IrStore, handle: IRIndex): IRIndex = + ## Write through a dereference of a pointer, ref, or ``var``/``lent`` + missingImpl() + +proc irBarrier*(c: var IrStore, path: PathIndex): IRIndex = + ## Read (maybe also write?) barrier. A load must not be moved across a + ## barrier + missingImpl() + +proc irOp*(c: var IrStore, opc: TOpcode, args: varargs[IRIndex]): IRIndex = + ## An operation that takes input and produces an output without side-effects + c.add(IrNode2(kind: inktOp, opc: opc, opArgs: @args)) + +proc irStmt*(c: var IrStore, opc: TOpcode, args: varargs[IRIndex]): IRIndex {.discardable.} = + ## Can't be reordered with other statements. The returned IRIndex must not + ## be used in a value or location context + c.add(IrNode2(kind: inktStmt, opc: opc, opArgs: @args)) + +func irCall*(c: var IrStore3, callee: IRIndex, args: varargs[IRIndex]): IRIndex = + c.add(IrNode3(kind: ntkCall, isBuiltin: false, callee: callee, args: @args)) + +func irCall*(c: var IrStore3, callee: BuiltinCall, typ: PType, args: varargs[IRIndex]): IRIndex = + c.add(IrNode3(kind: ntkCall, isBuiltin: true, builtin: callee, typ: typ, args: @args)) + +proc irCallExpr*(c: var IrStore, name: IRIndex, noSideEffect: bool, args: varargs[IRIndex]): IRIndex = + ## A call producing a value. Can only be reordered with other calls if both have no side-effects + c.add(IrNode2(kind: inktCallExpr, name: name, hasSideEffect: not noSideEffect, args: @args)) + +proc irCallPathExpr*(c: var IrStore, name: IRIndex, noSideEffect: bool, args: varargs[IRIndex]): IRIndex = + ## A call producing a handle. Acts as a path constructor. Can only be + ## reordered with other calls if both have no side-effects + c.add(IrNode2(kind: inktPathCall, name: name, hasSideEffect: not noSideEffect, args: @args)) + +proc irCallStmt*(c: var IrStore, name: IRIndex, noSideEffect: bool, args: varargs[IRIndex]): IRIndex = + ## A call producing nothing. Acts as a path constructor. Call has no side-effects + c.add(IrNode2(kind: inktCallStmt, name: name, hasSideEffect: not noSideEffect, args: @args)) + +func irJoinFwd*(c: var IrStore3): JoinPoint = + ## TODO: document + ## Helper to make modifications of the IR easier. During IR-gen when the + ## target is not yet known (e.g. when generating an if-branch) + c.joins.add (0, false) + result = c.joins.high + +func irLoopJoin*(c: var IrStore3): JoinPoint = + result = c.joins.len + let pos = c.add IrNode3(kind: ntkJoin, joinPoint: result) + c.joins.add (pos, true) + +func irJoin*(c: var IrStore3, jp: JoinPoint) = + ## TODO: document + ## A join point + let pos = c.add(IrNode3(kind: ntkJoin, joinPoint: jp)) + c.joins[jp][0] = pos + +func keys*(x: seq): Slice[int] = + 0..x.high + +func irBranch*(c: var IrStore3, cond: IRIndex, target: JoinPoint): IRIndex {.discardable.} = + ## TODO: document + ## A branch + assert target in c.joins.keys + c.add(IrNode3(kind: ntkBranch, cond: cond, target: target)) + +func irGoto*(c: var IrStore3, target: JoinPoint): IRIndex {.discardable.} = + ## TODO: document + ## Unstructured control-flow. + c.add(IrNode3(kind: ntkGoto, gotoTarget: target)) + +func irCont*(c: var IrStore3, target: JoinPoint, then: JoinPoint) = + discard c.add(IrNode3(kind: ntkGotoCont, contTarget: target, contThen: then)) + +func irGotoLink*(c: var IrStore3, target: JoinPoint) = + discard + +func irNull*(c: var IrStore, typ: PType): IRIndex = + ## The zero representation for `typ` + missingImpl() + +func isGoto*(c: IrStore, p: IRIndex): bool {.inline.} = + missingImpl() + +func irLocal*(c: var IrStore3, name: int): IRIndex = + ## references a local + c.add(IrNode3(kind: ntkLocal, local: name)) + +func irAddr*(c: var IrStore3, loc: IRIndex): IRIndex = + ## Take the address of a location + c.add(IrNode3(kind: ntkAddr, addrLoc: loc)) + +# version 1 (old) transition helpers + +func irLit*(c: var IrStore3, n: PNode): IRIndex = + #assert n.typ != nil + c.add(IrNode3(kind: ntkLit, lit: n)) + +# TODO: not related to the IR. Might need a better home +proc append*(dst: var CodeFragment, f: CodeFragment) = + ## Appends the code from `f` to `dst`. The code doesn't need to be adjusted, + ## since all jumps are relative + dst.code.add(f.code) + dst.debug.add(f.debug) + +# ------ query procs + +func len*(c: IrStore3): int = + c.nodes.len + +func numLocals*(s: IrStore3): int = + s.locals.len + +func isLastAGoto*(ir: IrStore3): bool = + ir.nodes.len > 0 and ir.nodes[^1].kind in {ntkGoto, ntkGotoCont} + +iterator nodes*(s: IrStore3): lent IrNode3 = + for it in s.nodes: + yield it + +iterator locals*(s: IrStore3): (PType, PSym) = + for it in s.locals: + yield (it[1], it[2]) + +func at*(irs: IrStore3, i: IRIndex): lent IrNode3 = + irs.nodes[i] + +func sym*(c: IrStore3, n: IrNode3): PSym = + c.syms[n.symIdx] + +func getLocal*(irs: IrStore3, n: IRIndex): (LocalKind, PType, PSym) = + irs.locals[irs.nodes[n].local] + +func getLocalIdx*(irs: IrStore3, n: IRIndex): int = + irs.nodes[n].local + +func getLit*(irs: IrStore3, n: IrNode3): PNode = + n.lit + +func isLoop*(ir: IrStore3, j: JoinPoint): bool = + ir.joins[j][1] + +func position*(ir: IrStore3, j: JoinPoint): IRIndex = + ir.joins[j][0] + +func kind*(n: IrNode3): IrNodeKind3 {.inline.} = + n.kind + +func fieldIdx*(n: IrNode3): int = + n.field.int + +func arrIdx*(n: IrNode3): IRIndex = + n.idx + +func isBuiltIn*(n: IrNode3): bool = + n.isBuiltin + +func callee*(n: IrNode3): IRIndex = + n.callee + +func builtin*(n: IrNode3): BuiltinCall = + n.builtin + +func argCount*(n: IrNode3): int = + n.args.len + +func args*(n: IrNode3, i: int): IRIndex = + n.args[i] + +iterator args*(n: IrNode3): IRIndex = + for it in n.args: + yield it + + +func typ*(n: IrNode3): PType = + ## The return type of a builtin call + n.typ + +func asgnKind*(n: IrNode3): AssignKind = + n.asgnKind + + +func cond*(n: IrNode3): IRIndex = + n.cond + +func joinPoint*(n: IrNode3): JoinPoint = + n.joinPoint + +func target*(n: IrNode3): JoinPoint = + case n.kind + of ntkBranch: + n.target + of ntkGoto: + n.gotoTarget + else: + unreachable(n.kind) + +func addrLoc*(n: IrNode3): IRIndex = + n.addrLoc + +func wrLoc*(n: IrNode3): IRIndex = + n.wrDst + +func srcLoc*(n: IrNode3): IRIndex = + let node = n + case node.kind + of ntkAsgn: + node.wrSrc + of ntkPathObj: + node.objSrc + of ntkPathArr: + node.arrSrc + of ntkUse, ntkConsume: + node.theLoc + else: + unreachable(node.kind) + +# ------ optimizer + +#[ +func run*(s: var IrStore) = + ## Calculate lifetimes and aliases + + type CfgNode = object + isLoop: bool + prev: seq[ref CfgNode] + next: seq[ref CfgNode] + + var start = (ref CfgNode)() + var curr = start + + var i = 0 + var joins: Table[IRIndex, ref CfgNode] + var loads: seq[(IRIndex, ref CfgNode, int)] + var stores: seq[(IRIndex, ref CfgNode, int)] + var roots: seq[int] + var locs: seq[tuple[isAlias: bool, others: seq[int], root: int]] + var uses: seq[(IRIndex, ref CfgNode, int)] + var locMap: Table[IRIndex, int] + + roots.add(0) # root 0 = params + roots.add(1) # root 1 = globals + roots.add(2) # root 2 = constants + + proc addLoc(p: IRIndex, root: int) = + let loc = locs.len + locs.add((false, @[], root)) + locMap[p] = loc + + while i < s.ops.len: + let n = s.ops[i] + case n.kind + of inktParam: + addLoc(i, 0) + of inktGlobal: + addLoc(i, 1) + of inktConstLoc: + addLoc(i, 2) + of inktTemp, inktLocal: + let root = roots.len + roots.add(0) + addLoc(i, root) + of inktLd: + loads.add((i, curr, locMap[n.ldSrc])) + uses.add((i, curr, locMap[n.ldSrc])) + + of inktWr: + stores.add((i, curr, locMap[n.wrDst])) + uses.add((i, curr, locMap[n.wrDst])) + + of inktJoinFwd: + joins[n.fwd] = (ref CfgNode)() + + of inktJoin: + if i notin joins: + curr = (ref CfgNode)() + joins[i] = curr + else: + curr = joins[i] + + of inktBranch: + # XXX: is there always a i + 1? + if s.ops[i+1].kind != inktJoin: + let newNode = new(CfgNode) + curr.next.add(newNode) + newNode.prev.add(curr) + curr = newNode + + of inktGoto: + let target = if s.ops[n.gotoTarget].kind == inktJoinFwd: s.ops[n.gotoTarget].fwd else: n.gotoTarget + if target < i: + # XXX: meh + joins[target].isLoop = true + + joins[target].prev.add(curr) + curr.next.add(joins[target]) + else: + discard "ignore" + + inc i +]# + +func run*(s: var IrStore) = + # TODO: improve documentation + + var writes: seq[tuple[loc: int]] + var targets: seq[int] # add targets for handles + var trustDiff: seq[(int, bool)] + var loadDiff: seq[(int, bool)] + var aliasDiff: seq[(IRIndex)] + var frames: seq[tuple[td, ld: Slice[int]]] # the diff frames + + var tsTD: int # thread-start load diff + var tsLD: int # thread-start trust diff + + var roots: seq[int] + var locs: seq[tuple[isAlias: bool, others: seq[int], root: int]] + + var locMap: Table[IRIndex, int] # index -> index into locs + var handleMap: Table[IRIndex, int] # index -> index into locs + + var trusted: seq[bool] ## root index -> trust state + # TODO: should be seq: + var loaded: IntSet ## for each path index -> loaded state + + var joinPoints: Table[IRIndex, seq[int]] ## joint point -> list of indices into `frames` + var nextThreads: seq[IRIndex] # XXX: a circular buffer would make sense here + + const UniqueRoot = 0 ## the unique root is used for things that can never + ## alias. Temporaries use the unique root for example + + var paramRoots: seq[int] + + # TODO: try to make `localRoots` a seq + var localRoots: Table[IRIndex, int] # local index -> root index + + proc root(s: IrStore, p: IRIndex): int = + # TODO: better doc + ## Gets the currently active root for the given location + let n = s.ops[p] + case n.kind + of inktTemp: + UniqueRoot + of inktCallExpr, inktOp: + # the return values of call expressions are temporaries + UniqueRoot + of inktPathObj: + s.root(n.objSrc) + of inktPathArr: + s.root(n.arrSrc) + of inktPathCall: + # for path calls (functions returning views) the returned handle is always derived from the first argument + s.root(n.args[0]) + of inktLocal: + localRoots[p] + of inktConstLoc: + # constants can never be written too + UniqueRoot + else: + unreachable(n.kind) + + proc addRoot(trust: bool): int = + result = roots.len + trusted.add(trust) + + proc isTrusted(root: int): bool = + trusted[root] + + var addList: seq[(IRIndex, IrNode2)] + var delList: seq[IRIndex] + + proc validate(root: int) = + if not trusted[root]: + # TODO: insert `irVerify` + trusted[root] = true + trustDiff.add((root, false)) # add the inversion to the diff + + var i = 0 + while i < s.ops.len: + let n = s.ops[i] + case n.kind + of inktLocal: + # setup the root. Locals start as trusted + localRoots[i] = addRoot(trust=true) + of inktLd: + let r = s.root(n.ldSrc) + if not isTrusted(r): + # the root is not trusted -> validate the root + validate(r) + + if n.ldSrc in loaded: + # the load can be removed + delList.add(i) + else: + # source is not loaded yet + loadDiff.add((n.ldSrc, false)) + loaded.incl n.ldSrc + + of inktWr: + let r = s.root(n.wrDst) + if not isTrusted(r): + # the root is not trusted + validate(r) + + + of inktAddr: + # treat addr as an imperative statement + discard + + + of inktBranch: + let target = if s.ops[n.target].kind == inktJoinFwd: s.ops[n.target].fwd else: n.target + # execute the branch not taken case (i.e. fallthrough) first + nextThreads.add(target) + + # start a new thread + tsTD = trustDiff.len + tsLD = loadDiff.len + + of inktGoto: + # FIXME: loops will cause an infinite loop here + # TODO: check how loop end fare + let target = if s.ops[n.gotoTarget].kind == inktJoinFwd: s.ops[n.gotoTarget].fwd else: n.gotoTarget + # end of thread + # TODO: clean up thread + frames.add((td: tsTD..trustDiff.high, ld: tsLD..loadDiff.high)) + joinPoints.mgetOrPut(target, @[]).add(frames.high) + + # TODO: improve the next == `target` case + + # TODO: use a binary search here + var p = -1 + for j, t in nextThreads.pairs: + if target <= t: + p = j + break + + if p == -1: + nextThreads.add(target) + elif target != nextThreads[p]: + nextThreads.insert(target, p) + + debugEcho "next threads: ", target + # goto join point + i = nextThreads[0] + debugEcho "what: ", i + nextThreads.delete(0) + + # start a new thread + tsTD = trustDiff.len + tsLD = loadDiff.len + + debugEcho "goto: ", i + debugEcho "next threads: ", nextThreads + + dec i + + else: + discard "not relevant" + + + inc i + + +func optimize(s: var IrStore) = + discard + + +# ------- end + +#func runAlias*(s: IrStore) = + + + +#[ +func populateHandles(s: var IrStore) = + s.handles.newSeq(s.ops.len) + for i, x in s.ops.pairs: + case x.kind + of irkPath: + if x.isDyn: + s.handles[i] = HandleInfo(root: x.parent.int32, sub: high(uint32)) + else: + s.handles[i] = HandleInfo(root: x.parent.int32, sub: x.sub.uint32) + of irkDeref: + s.handles[i] = s.handles[x.ds] + of irkAddr: + s.handles[i] = s.handles[x.aso] + else: + discard +]# + +template getT(c: TCtx, id: VmTypeId): PVmType = + discard + + +func freeReg(gs: var GenState): TRegister = + for i, r in gs.regs.mpairs: + if not r: + r = true + return TRegister(i) + + result = TRegister(gs.regs.len) + gs.regs.add(true) + +func regRange(gs: var GenState, num: int): Slice[TRegister] = + # TODO: refactor + for i, r in gs.regs.pairs: + if not r: + block search: + # test if `num` registers starting at `i` are free: + for j in i+1 ..< i+num: + if gs.regs[i]: + # they aren't + break search + + # claim registers + for j in i.. high(uint16).int: + fail(unknownLineInfo, rsemTooManyRegistersRequired) + + let start = gs.regs.len + gs.regs.setLen(start + num) + result = TRegister(start)..TRegister(start+num-1) + for i in result: + gs.regs[i] = true + +func gABC(ctx: var TCtx; n: PNode; opc: TOpcode; a, b, c: TRegister = 0) = + ## Takes the registers `b` and `c`, applies the operation `opc` to them, and + ## stores the result into register `a` + ## The node is needed for debug information + assert opc.ord < 255 + let ins = (opc.TInstrType or (a.TInstrType shl regAShift) or + (b.TInstrType shl regBShift) or + (c.TInstrType shl regCShift)).TInstr + #[ + when false: + if ctx.code.len == 43: + writeStackTrace() + echo "generating ", opc + ]# + ctx.code.add(ins) + ctx.debug.add(n.info) + +proc gABI(c: var TCtx; n: PNode; opc: TOpcode; a, b: TRegister; imm: BiggestInt) = + # Takes the `b` register and the immediate `imm`, applies the operation `opc`, + # and stores the output value into `a`. + # `imm` is signed and must be within [-128, 127] + c.config.internalAssert(imm in -128..127 , n.info, + "VM: immediate value does not fit into an int8") + + let ins = (opc.TInstrType or (a.TInstrType shl regAShift) or + (b.TInstrType shl regBShift) or + (imm+byteExcess).TInstrType shl regCShift).TInstr + c.code.add(ins) + c.debug.add(n.info) + +proc gABx(c: var TCtx; n: PNode; opc: TOpcode; a: TRegister = 0; bx: int) = + # Applies `opc` to `bx` and stores it into register `a` + # `bx` must be signed and in the range [regBxMin, regBxMax] + + #[ + when false: + if c.code.len == 43: + writeStackTrace() + echo "generating ", opc + ]# + + c.config.internalAssert(bx in regBxMin-1..regBxMax, n.info, + "VM: immediate value does not fit into regBx") + + let ins = (opc.TInstrType or a.TInstrType shl regAShift or + (bx+wordExcess).TInstrType shl regBxShift).TInstr + c.code.add(ins) + c.debug.add(n.info) + + +func gABx2(ctx: var CodeFragment, opc: TOpcode, a: TRegister, bx: int; info: TLineInfo = unknownLineInfo) = + # Applies `opc` to `bx` and stores it into register `a` + # `bx` must be signed and in the range [regBxMin, regBxMax] + + #c.config.internalAssert(bx in regBxMin-1..regBxMax, n.info, + # "VM: immediate value does not fit into regBx") + + let ins = (opc.TInstrType or a.TInstrType shl regAShift or + (bx+wordExcess).TInstrType shl regBxShift).TInstr + debugEcho opc + ctx.code.add(ins) + ctx.debug.add(info) + +func gABC2(ctx: var CodeFragment, opc: TOpcode, a: TRegister; b, c: TRegister = 0; info: TLineInfo = unknownLineInfo) = + ## Takes the registers `b` and `c`, applies the operation `opc` to them, and + ## stores the result into register `a` + ## The node is needed for debug information + assert opc.ord < 255 + let ins = (opc.TInstrType or (a.TInstrType shl regAShift) or + (b.TInstrType shl regBShift) or + (c.TInstrType shl regCShift)).TInstr + + ctx.code.add(ins) + ctx.debug.add(info) + +func gABI2(ctx: var CodeFragment, opc: TOpcode, a: TRegister; b: TRegister; imm: int; info: TLineInfo = unknownLineInfo) = + # Takes the `b` register and the immediate `imm`, applies the operation `opc`, + # and stores the output value into `a`. + # `imm` is signed and must be within [-128, 127] + #c.config.internalAssert(imm in -128..127 , n.info, + # "VM: immediate value does not fit into an int8") + + let ins = (opc.TInstrType or (a.TInstrType shl regAShift) or + (b.TInstrType shl regBShift) or + (imm+byteExcess).TInstrType shl regCShift).TInstr + debugEcho opc + ctx.code.add(ins) + ctx.debug.add(info) + +proc xjmp(c: var TCtx; n: PNode; opc: TOpcode; a: TRegister = 0): TPosition = + #assert opc in {opcJmp, opcFJmp, opcTJmp} + result = TPosition(c.code.len) + gABx(c, n, opc, a, 0) + +func genLabel(c: TCtx): TPosition = + result = TPosition(c.code.len) + #c.jumpTargets.incl(c.code.len) + +proc jmpBack(c: var TCtx, n: PNode, p = TPosition(0)) = + let dist = p.int - c.code.len + internalAssert(c.config, regBxMin < dist and dist < regBxMax) + gABx(c, n, opcJmpBack, 0, dist) + +proc patch(c: var TCtx, p: TPosition) = + # patch with current index + let p = p.int + let diff = c.code.len - p + #c.jumpTargets.incl(c.code.len) + internalAssert(c.config, regBxMin < diff and diff < regBxMax) + let oldInstr = c.code[p] + # opcode and regA stay the same: + c.code[p] = ((oldInstr.TInstrType and regBxMask).TInstrType or + TInstrType(diff+wordExcess) shl regBxShift).TInstr + +const EmptySlot = high(TRegister) + + +func genPath(c: var CodeFragment, gs: var GenState, s: IrStore, p: IRIndex; isAddr: bool = false): TRegister + +func genOp(c: var CodeFragment, gs: GenState, s: IrStore, p: IRIndex, dst: TRegister) = + let n = s.ops[p] + assert n.kind == inktOp + +func getImm(s: IrStore, p: IRIndex): int = + ## Returns the immediate value stored at pos `p` + missingImpl() + +func asgnReg(c: var CodeFragment, a, b: TRegister) = + c.gABC2(opcCpReg, a, b) + +func genBuiltinCall(c: var CodeFragment, gs: var GenState, s: IrStore, n: IrNode2): TRegister = + # TODO: refactor; improve ``InstrInfo`` + let info = InstrInfos[n.opc] + var inputs: array[5, int] + var numArgs = n.opArgs.len + assert numArgs <= 5 + for i, x in n.opArgs.pairs: + inputs[i] = x + + if info.hasResult: + result = gs.freeReg() + + if info.isInOut or not info.hasResult: + result = inputs[0] + inputs[0] = inputs[1] + inputs[1] = inputs[2] + inputs[2] = inputs[3] + inputs[3] = inputs[4] + inputs[4] = 0 + dec numArgs + + if info.isBx: + # TODO: also needs to be asserted when emitting the IR: + assert numArgs == 1 + c.gABx2(n.opc, result, inputs[0]) + elif info.hasImm: + assert numArgs == 2 + # TODO: assert that inputs[1] is taken from a literal + c.gABI2(n.opc, result, inputs[0], inputs[1]) + elif numArgs <= 2: + c.gABC2(n.opc, result, inputs[0], inputs[2]) + else: + c.gABC2(n.opc, result, inputs[0], inputs[1]) + c.gABC2(n.opc, inputs[2], inputs[3], inputs[4]) + + +func getType(s: IrStore, types: seq[PVmType], p: IRIndex): PVmType = + let n = s.ops[p] + #case n.kind + #of inktPathObj: + # getTyp() + + +func genValue(c: var CodeFragment, gs: var GenState, s: IrStore, p: IRIndex): TRegister = + if gs.nodeRegs[p] != EmptySlot: + return gs.nodeRegs[p] + + result = freeReg(gs) # FIXME: wrong, sometimes not used + let n = s.ops[p] + case n.kind + of inktImm: + c.gABx2(opcLdImmInt, result, n.immediate.int) + of inktConst: + # TODO: maybe rename to opcLdLit? + c.gABx2(opcLdConst, result, n.litId.int) + of inktLd: + # TODO: assert that the register entry is populated + # TODO: use a proper `genLoad`. `genPath` is wrong here!! + result = genPath(c, gs, s, n.ldSrc) + # TODO: emit validate instruction + of inktOp: + result = genBuiltinCall(c, gs, s, n) + of inktCallExpr: + let regs = regRange(gs, n.args.len+1) + c.asgnReg(regs.a, genValue(c, gs, s, n.name)) + for i, arg in n.args.pairs: + c.asgnReg(regs.a + i + 1, genValue(c, gs, s, arg)) + + c.gABC2(opcIndCallAsgn, result, regs.a, regs.len) + + of inktProc: + # XXX: procedural values aren't stored in registers yet and thus require + # special handling depending on where they're used. As an alternative + # temporary workaround, we could perform a + # ``opdLdNull``+``opcWrProc`` here, but that would require a type + # lookup (for LdNull) that's hard to pull off here. + unreachable("inktProc requires special handling") + else: + unreachable(n.kind) + + gs.nodeRegs[p] = result + +func genLoc(c: var CodeFragment, gs: var GenState, s: IrStore, p: IRIndex): TRegister = + ## Emits the instructions for the given IR yielding a handle to a location + result = gs.nodeRegs[p] + if result != EmptySlot: + return + + let n = s.ops[p] + case n.kind + of inktGlobal: + result = gs.freeReg() + c.gABx2(opcLdGlobal, result, n.linkIndex.int) + else: + # FIXME: wrong + result = genPath(c, gs, s, p) + + gs.nodeRegs[p] = result + +func genPath(c: var CodeFragment, gs: var GenState, s: IrStore, p: IRIndex; isAddr: bool = false): TRegister = + # `genPath` implementation and architecture is unacceptable, but works for now + + result = gs.nodeRegs[p] + if result != EmptySlot: + return + + let n = s.ops[p] + result = freeReg(gs) # XXX: due to recursion, order is wrong + case n.kind + of inktPathObj: + let src = genPath(c, gs, s, n.objSrc) + c.gABC2(opcLdObj, result, src, TRegister(n.field)) + of inktPathArr: + let src = genPath(c, gs, s, n.objSrc) + let idx = genValue(c, gs, s, n.idx) + + c.gABC2(opcLdArr, result, src, idx) + + of inktPathCall: + result = genValue(c, gs, s, p) + + else: + unreachable(n.kind) + + gs.nodeRegs[p] = result + + +proc optimizeJumps(c: var CodeFragment; start: int) + +proc getReg*(gs: GenState, x: IRIndex): TRegister = + gs.nodeRegs[x] + +func isVoid*(s: IrStore, idx: IRIndex): bool = + s.ops[idx].kind in {inktStmt, inktCallStmt} + +proc genCode*(s: var IrStore, gs: var GenState): (CodeFragment, int) = + var handles: seq[TRegister] + handles.newSeq(s.paths.len) + + var code: CodeFragment + var cl: GenClosure + + gs.nodeRegs.newSeq(s.ops.len) + for r in gs.nodeRegs.mitems: + r = EmptySlot # FIXME: sentinel value is a valid register index + + for i, x in s.ops.pairs: + case x.kind + of inktLd, inktLdDeref: + # a load is only generated when it's connected to a statement + # TODO: maybe a bad idea (requires one to be more vigilant regarding + # evaluation order) + discard + of inktWr: + # meh, procs require special handling for now: + if s.ops[x.wrSrc].kind == inktProc: + let loc = genLoc(code, gs, s, x.wrDst) + code.gABx2(opcWrProc, loc, s.ops[x.wrSrc].linkIndex.int) + continue + + let src = genValue(code, gs, s, x.wrSrc) # TODO: left-to-right violation? + + let dstN = s.ops[x.wrDst] + case dstN.kind + of inktPathArr: + let a = genPath(code, gs, s, dstN.arrSrc) + let idx = genValue(code, gs, s, dstN.idx) + code.gABC2(opcWrArr, a, idx, src) + of inktPathObj: + let a = genPath(code, gs, s, dstN.objSrc) + let f = TRegister(dstN.field) + code.gABC2(opcWrObj, a, f, src) + of inktPathCall: + let v = genValue(code, gs, s, i) + # TODO: merge ``opcAsgnComplex`` and the other 'opcAsgnX' instructions into just ``opcAsgn`` + # a ``lent`` or ``var`` value is currently implemented as a pointer + code.gABC2(opcWrDeref, v, 0, src) + of inktGlobal, inktTemp, inktLocal: + let v = genLoc(code, gs, s, x.wrDst) + code.gABC2(opcAsgnComplex, v, src) + else: + # dst must be a path component + unreachable(dstN.kind) + + of inktWrDeref: + let + # do _NOT_ reorder these two declarations + dst = genValue(code, gs, s, x.wrDst) + src = genValue(code, gs, s, x.wrSrc) + + code.gABC2(opcWrDeref, dst, src) + + of inktStmt: + discard genBuiltinCall(code, gs, s, x) + of inktCallExpr, inktCallStmt: + if x.hasSideEffect: + discard genValue(code, gs, s, i) + + of inktCommit: + discard genValue(code, gs, s, x.call) + + of inktTemp, inktLocal: + # XXX: register allocation is a mess, a second, more low-level IR might + # be needed + let reg = gs.freeReg() + gs.nodeRegs[i] = reg + + code.gABx2(opcLdNull, reg, x.typ.int) + + of inktGoto: + case x.gotoOp + of TOpcode(0): + # TODO: emit simple jump + echo "Missing jump" + of opcRet: + code.gABC2(opcRet, gs.nodeRegs[1]) # FIXME: hardcoded result handling + else: + unreachable(x.gotoOp) + + else: + #echo "ignore: ", x.kind + # the rest is only generated on use + discard + + optimizeJumps(code, 0) + + #echo "dump" + #echo s.ops.len + #echo code.code.len + #echo gs.regs.len + + result = (move code, gs.regs.len) + +proc finalJumpTarget(c: var CodeFragment; pc, diff: int) = + #[internalAssert( + c.config, + regBxMin < diff and diff < regBxMax, + "Jump target is not in range of min/max registers - $1 < $2 < $3 failed" % [ + $regBxMin, $diff, $regBxMax])]# + + let oldInstr = c.code[pc] + # opcode and regA stay the same: + c.code[pc] = (( + oldInstr.TInstrType and + ((regOMask shl regOShift) or (regAMask shl regAShift))).TInstrType or + TInstrType(diff+wordExcess) shl regBxShift).TInstr + + +proc optimizeJumps(c: var CodeFragment; start: int) = + const maxIterations = 10 + for i in start.. 0: + d += c.code[d].jmpDiff + dec iters + if c.code[d].opcode == opcRet: + # optimize 'jmp to ret' to 'ret' here + c.code[i] = c.code[d] + elif d != i + c.code[i].jmpDiff: + c.finalJumpTarget(i, d - i) + else: discard + + +proc dumpIr*(s: IrStore): string = + var names: seq[string] + names.newSeq(s.ops.len) + + var ldName: int + + proc genExpr(p: IRIndex): string = + proc genProc(p: IRIndex): string = + if s.ops[p].kind == inktProc: + result = "proc_" & $s.ops[p].linkIndex + else: + result = genExpr(p) + + if names[p].len > 0: + return names[p] + + let n = s.ops[p] + case n.kind + of inktImm: + result &= $n.immediate + of inktOp: + result = $n.opc & "(" + for arg in n.opArgs.items: + result &= genExpr(arg) + result &= ", " + result &= ")" + of inktCallExpr: + let name = genProc(n.name) + result = name & "(" + for arg in n.args.items: + result &= genExpr(arg) + result &= ", " + result &= ")" + of inktGlobal: + result &= "global_" & $n.linkIndex + of inktConst: + # TODO: print the literal value + result &= "lit_" & $n.litId + of inktPathObj: + result &= genExpr(n.objSrc) & ".field_" & $n.field + of inktParam: + result &= "local_" & $n.param + of inktProc: + result &= genProc(p) + of inktAddr: + result &= "addr " & genExpr(n.addrLoc) # XXX: should use `genLoc` (doesn't matter much) + of inktPathArr: + result &= genExpr(n.arrSrc) & "[" + result &= genExpr(n.idx) + result &= "]" + else: + unreachable(n.kind) + + proc genLoc(p: IRIndex): string = + if names[p].len > 0: + return names[p] + + result = genExpr(p) + doAssert result.len > 0 + + for i, n in s.ops.pairs: + case n.kind + of inktLd: + names[i] = "loaded_" & $ldName + inc ldName + result &= "let " & names[i] & " = " & genExpr(n.ldSrc) & "\n" + of inktWr: + result &= genLoc(n.wrDst) + result &= " = " + result &= genExpr(n.wrSrc) + result &= "\n" + of inktTemp: + names[i] = "tmp_" & $n.tmpName + + result &= "var " & names[i] & "\n" + of inktLocal: + names[i] = "local_" & $i + + result &= "var " & names[i] & "\n" + of inktCallStmt: + let name = genExpr(n.name) + result &= name & "(" + for arg in n.args.items: + result &= genExpr(arg) + result &= ", " + result &= ")\n" + of inktStmt: + result &= $n.opc & "(" + for arg in n.opArgs.items: + result &= genExpr(arg) + result &= ", " + result &= ")\n" + + of inktImm, inktConst, inktParam, inktPathObj, inktPathCall, inktPathArr, inktOp, inktProc, inktAddr: + discard + of inktJoinFwd: + names[n.fwd] = "label_" & $i + of inktJoin: + if names[i].len == 0: + names[i] = "label_" & $i + + result &= names[i] & ":\n" + of inktBranch: + case n.branchOp + of opcTJmp: + result &= "if " & genExpr(n.cond) + of opcFJmp: + result &= "if not " & genExpr(n.cond) + else: unreachable(n.branchOp) + + result &= ":\n" + else: + result &= "missing: " & $n.kind & "\n" + +type NewNodeKind* = IrNodeKind3 + + +type + NewNode* = object + kind*: NewNodeKind + ExecProc*[A, B] = proc (c: var A, ls: var B, n: NewNode) + NewIr* = seq[NewNode] + +type + LocIndex* = int + FieldId* = int + NewNodeId* = int + ValueId* = int + +func target(n: NewNode): NewNodeId = + discard + +func gotoTarget(n: NewNode): NewNodeId = + discard + + +func id*(n: NewNode): int = + discard + +func loc*(n: NewNode): LocIndex = + discard + +func srcLoc*(n: NewNode): LocIndex = + discard + +func isViewCall*(n: NewNode): bool = + discard + +func arg*(n: NewNode, i: int): LocIndex = + discard + + +type ProcInfo = object + # TODO: merge bools into a bitset + isInline: bool + addrTaken: bool # whether the procedure is called indirectly + deps: Slice[uint32] + order: int + +func merge(a: var ProcInfo, b: ProcInfo) = + a.addrTaken = a.addrTaken or b.addrTaken + +func callDeps(body: PNode, deps: var Table[int, ProcInfo]) = + # TODO: figuring out the dependencies could and should be done differently, + # preferably without the need for recursion + case body.kind + of nkSym: + # XXX: detecting a prodecure symbol use in a non-call-symbol context like + # this is brittle (and also wrong when there are meta expressions in `body`) + deps.mgetOrPut(body.sym.id, ProcInfo()).addrTaken = true + of nkCallKinds: + let symNode = body[0] + if symNode.kind == nkSym: + # add a dependency if it doesn't exist already + discard deps.hasKeyOrPut(symNode.sym.id, ProcInfo()) + + for i in 1.. graph index + var tmp: Table[int, ProcInfo] + + type NodeIndex = int + + var gc: seq[ProcInfo] # graph-control; the graph nodes + var deps: seq[NodeIndex] # a seq of seqs inlined as a single seq + + gc.newSeq(aliveSet.len) + + for i, it in aliveSet.pairs: + gc[i].isInline = it.typ.callConv == ccInline + gc[i].deps = 1'u32..0'u32 # empty slice + tmp.clear() + callDeps(it.ast, tmp) + let start = deps.len + gc[i].deps = uint32(start)..uint32(start+tmp.len-1) + + #deps.setLen(deps.len + tmp.len) + # merge the collected proc infos + for id, info in tmp.pairs: + let nodeIdx = tbl[id] + merge(gc[nodeIdx], info) + # TODO: don't `add`; use `setLen` + `[]=` + deps.add(nodeIdx) + + # compute the ordering by iteratively propagating the order value + # TODO: this needs to be done differently + var modified = true + while modified: + modified = false + for it in gc.mitems: + let newOrder = it.order + 1 + for d in it.deps: + if gc[d].order < newOrder: + gc[d].order = newOrder + modified = true + + # compute the upper bounds + discard + +func firstPass*(s: var NewIr) = + ## Runs phase 1 and 2 + # TODO: `s` shouldn't be mutable + #runV2[AliasCtx, AliasesLocal, int](s, computeAliases) + #runV2[DestrCtx, DestrLocal, int](s, computeDestructors) + + +type + InputState = object + ModState = object + Loc = distinct int + + CGLocal = object + CGGlobal = object + code: CodeFragment + +#[ +func requiresAssign(s: ModState, x: Loc): bool = + s.hasRef(x) and not s.isCursor(x) +]# + +func isRegister(c: CGGlobal, n: LocIndex): bool = + discard + +func getFreeReg(ls: var CGLocal): int = + discard + +func setLoaded(ls: var CGLocal, loc: LocIndex, regIdx: int) = + discard + +func reg(s: CGLocal, loc: LocIndex): int = + discard + +func hasSideEffects(c: CGGlobal, n: NewNode): bool = + discard + +iterator loaded(ls: CGLocal): tuple[loc: LocIndex, reg: int] = + discard + +iterator args(n: NewNode): NewNode = + discard + +func hasResult(n: NewNode): bool = + discard + +func resetTrusted(ls: var CGLocal) = + discard + +func handle(ls: CGLocal, loc: LocIndex): int = + discard + +func typ(ls: CGGlobal, loc: LocIndex): PType = + discard + +func trust(ls: var CGLocal, l: LocIndex) = + discard + +func isTrusted(ls: CGLocal, l: LocIndex): bool = + discard + +const opcRegCopy = opcAsgnComplex # XXX: temporary +const opcValidate = opcAsgnComplex # XXX: temporary + +func genCodeV3Exec(c: var CGGlobal, ls: var CGLocal, n: NewNode) = + template verify(l: LocIndex) = + # TODO: must not be a template + ls.trust(l) + c.code.gABC2(opcValidate, ls.handle(l)) + + case n.kind + of ntkWrite: + # write to a location + if c.isRegister(n.loc): + # we can simply do a register copy. Also mark the register as loaded + assert c.isRegister(n.loc) + let r = ls.getFreeReg() + ls.setLoaded(n.loc, r) + + c.code.gABC2(opcRegCopy, ls.reg(n.loc), ls.reg(n.srcLoc)) + + of ntkLoad: + if c.isRegister(n.loc): + discard # XXX: hm, noop? + else: + # TODO: tyProc is wrong; only closures are not loaded + if c.typ(n.loc).kind in {tySequence, tyProc}: + # these don't go into registers + ls.setLoaded(n.loc, ls.handle(n.loc)) # XXX: wrong? + else: + if not ls.isTrusted(n.loc): + verify(n.loc) + + let r = ls.getFreeReg() + ls.setLoaded(n.loc, r) + + c.code.gABC2(opcNodeToReg, r, ls.handle(n.loc)) + + of ntkCall: + # TODO: maybe the flush analysis should be done in stage 4? + + # Calls are completely opaque, so we have no idea what's happening there. For the location validity analysis, we don't trust the + # `noSideEffect` attribute of a function since side-effects can be cast away via `{.cast.}`. The `noSideEffect` attribute is only + # used to decide if writes need to be made visible and some other optimizations + + if c.hasSideEffects(n): + # across a function call boundary, writes need to be observable and we need to flush all locations temporarily stored in registers back to memory + for x, r in ls.loaded: + discard + #c.gABC2(opcWrLoc, x) + #freeReg() + else: + # SPEC: does noSideEffect also mean that only locations reachable from the parameters are read? the current wording is somewhat ambiguous. Here it's assumed it does + + for arg in n.args: + discard + #x.isArg + # + + # reset all handles to non-locals back to untrusted + ls.resetTrusted() + + if n.hasResult: + discard #c.code.gABC2(opcIndCallAsgn, ) + + of ntkSym, ntkAddr, ntkDeref: + discard + + of ntkUse, ntkConsume, ntkAsgn: + unreachable(n.kind) + of ntkGoto, ntkBranch: + unreachable(n.kind) + else: + {.warning: "handle this".} + +#[ +func genCodeV2(ins: InputState) = + var s: ModState + + + + for n in ins.nodes.items: + case n.kind + of ntkWrite: + # write to a location + if s.isRegister(n.loc): + # we can simply do a register copy + assert s.isRegister(n.srcLoc) + c.gABC2(opcRegCopy, s.reg(n.loc), s.reg(n.srcLoc)) + else: + # target is a location + if s.isRegister(n.srcLoc): + assert not s.isComplex(n.loc) + c.gABC2(opcWrLoc, s.handle(n.loc), s.reg(n.srcLoc)) + else: + if s.requiresAssign(n.loc): + # ref-like types are not mutated in registers, so we don't need + # to flush any aliases + c.gABC2(opcAsgnComplex, s.handle(n.loc), s.handle(n.srcLoc)) + else: + # no need for a complex assign, just do a mem copy + c.gABC2(opcFastAsgn, s.handle(n.loc), s.handle(n.srcLoc)) + + of ntkLoad: + var needsLoad: bool + if not s.isTrusted(n): + assert not s.isLoaded(n.loc) # if + c.gABC2(opcChkHandle, s.handle(n.loc)) +]# + +# IrCursor interface + +type IrCursor* = object + pos: int + actions: seq[(bool, Slice[IRIndex])] # true = replace, false = insert + newSyms: seq[PSym] + newLocals: seq[(LocalKind, PType, PSym)] + newNodes: seq[IrNode3] + + traces: seq[seq[StackTraceEntry]] + + symStart: int + nextIdx: IRIndex + nextLocal: int + +func setup*(cr: var IrCursor, ir: IrStore3) = + cr.nextIdx = ir.len + cr.symStart = ir.syms.len + cr.nextLocal = ir.locals.len + +func getNext(cr: var IrCursor): IRIndex {.inline.} = + result = cr.nextIdx + inc cr.nextIdx + + +func setPos*(cr: var IrCursor, pos: IRIndex) {.inline.} = + cr.pos = pos + +func position*(cr: IrCursor): int {.inline.} = + cr.pos + +func replace*(cr: var IrCursor) = + ## Switches to replace mode. The next insert will overwrite the node at the cursor position + assert cr.actions.len == 0 or cr.actions[^1][0] == false or cr.actions[^1][1].a != cr.pos, "replace already called" + cr.actions.add (true, cr.pos .. cr.pos-1) + +func insert(cr: var IrCursor, n: sink IrNode3): IRIndex = + cr.newNodes.add n + {.cast(noSideEffect).}: + cr.traces.add getStackTraceEntries() + + if cr.actions.len > 0 and cr.actions[^1][1].a == cr.pos: + # append to the insertion or replacement + inc cr.actions[^1][1].b + else: + cr.actions.add (false, cr.pos..cr.pos) + result = cr.getNext() + +func insertSym*(cr: var IrCursor, sym: PSym): IRIndex = + assert sym != nil + result = cr.insert IrNode3(kind: ntkSym, symIdx: cr.symStart + cr.newSyms.len) + cr.newSyms.add sym + +func insertCallExpr*(cr: var IrCursor, sym: PSym, args: varargs[IRIndex]): IRIndex = + let c = cr.insertSym(sym) + result = cr.insert IrNode3(kind: ntkCall, isBuiltin: false, callee: c, args: @args) + +func insertCallStmt*(cr: var IrCursor, sym: PSym, args: varargs[IRIndex]) = + discard insertCallExpr(cr, sym, args) + +func insertCallExpr*(cr: var IrCursor, bc: BuiltinCall, typ: PType, args: varargs[IRIndex]): IRIndex = + result = cr.insert IrNode3(kind: ntkCall, isBuiltin: true, builtin: bc, typ: typ, args: @args) + +func insertLit*(cr: var IrCursor, lit: PNode): IRIndex = + cr.insert IrNode3(kind: ntkLit, lit: lit) + +func insertAsgn*(cr: var IrCursor, kind: AssignKind, a, b: IRIndex) = + discard cr.insert IrNode3(kind: ntkAsgn, asgnKind: kind, wrDst: a, wrSrc: b) + +func insertCast*(cr: var IrCursor, t: PType, val: IRIndex): IRIndex = + cr.insertCallExpr(bcCast, t, val) + +func insertConv*(cr: var IrCursor, t: PType, val: IRIndex): IRIndex = + cr.insertCallExpr(bcConv, t, val) + +func insertDeref*(cr: var IrCursor, val: IRIndex): IRIndex = + cr.insert IrNode3(kind: ntkDeref, addrLoc: val) + +func insertPathObj*(cr: var IrCursor, obj: IRIndex, field: uint16): IRIndex = + cr.insert IrNode3(kind: ntkPathObj, objSrc: obj, field: field) + +func insertPathArr*(cr: var IrCursor, arr, idx: IRIndex): IRIndex = + cr.insert IrNode3(kind: ntkPathArr, arrSrc: arr, idx: idx) + +func newJoinPoint*(cr: var IrCursor): JoinPoint = + discard + +func insertBranch*(cr: var IrCursor, cond: IRIndex, target: JoinPoint) = + discard + +func insertGoto*(cr: var IrCursor, t: JoinPoint) = + discard + +func insertJoin*(cr: var IrCursor, t: JoinPoint) = + discard + +func newLocal*(cr: var IrCursor, kind: LocalKind, s: PSym): int = + cr.newLocals.add((kind, s.typ, s)) + result = cr.nextLocal + inc cr.nextLocal + +func newLocal*(cr: var IrCursor, kind: LocalKind, t: PType): int = + assert kind == lkTemp + cr.newLocals.add((kind, t, nil)) + result = cr.nextLocal + inc cr.nextLocal + + +func insertLocalRef*(cr: var IrCursor, name: int): IRIndex = + cr.insert IrNode3(kind: ntkLocal, local: name) + + +func patch(n: var IrNode3, patchTable: seq[IRIndex]) = + func patchIdx(n: var IRIndex) = + assert patchTable[n] != -1, "node was removed" + n = patchTable[n] + + case n.kind + of ntkCall: + if not n.isBuiltin: + patchIdx(n.callee) + + for arg in n.args.mitems: + patchIdx(arg) + + of ntkAsgn: + patchIdx(n.wrDst) + patchIdx(n.wrSrc) + + of ntkUse, ntkConsume: + patchIdx(n.theLoc) + of ntkAddr, ntkDeref: + patchIdx(n.addrLoc) + of ntkBranch: + patchIdx(n.cond) + + of ntkPathObj: + patchIdx(n.objSrc) + of ntkPathArr: + patchIdx(n.arrSrc) + patchIdx(n.idx) + + of ntkJoin, ntkGoto, ntkSym, ntkLocal, ntkLocEnd, ntkImm, ntkGotoCont, + ntkContinue, ntkGotoLink, ntkLoad, ntkWrite, ntkRoot, ntkLit: + discard "nothing to patch" + +func inline*(cr: var IrCursor, other: IrStore3, args: varargs[IRIndex]): IRIndex = + ## Does NOT create temporaries for each arg + # XXX: unfinished + + # register the insertion + if cr.actions.len > 0 and cr.actions[^1][1].a == cr.pos: + # append to the insertion or replacement + cr.actions[^1][1].b += other.len + else: + cr.actions.add (false, cr.pos..(cr.pos+other.len-1)) + + let oldLen = cr.newNodes.len + + cr.newNodes.add(other.nodes) + cr.newSyms.add(other.syms) + cr.newLocals.add(other.locals) + cr.traces.add(other.sources) # use the traces of the original + + var patchTable = newSeq[IRIndex](other.len) + + # search for references to parameters and replace them with the + # corresponding arg from `args`. Also patch symbol and local references + for i in oldLen.. 0 + patchTable[slice.a] = slice.b + currOff # the replaced node + for i in slice.a.. Date: Wed, 17 Aug 2022 16:29:47 +0100 Subject: [PATCH 002/395] vmir: generalize storage of changes for `IrCursor` --- compiler/vm/vmir.nim | 57 ++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 6212d8e9f0c..cadf6f99d04 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -1799,23 +1799,46 @@ func genCodeV2(ins: InputState) = # IrCursor interface +type + SeqAdditions[T] = object + # TODO: needs a better name + data: seq[T] + start: int + type IrCursor* = object pos: int actions: seq[(bool, Slice[IRIndex])] # true = replace, false = insert - newSyms: seq[PSym] - newLocals: seq[(LocalKind, PType, PSym)] + newSyms: SeqAdditions[PSym] + newLocals: SeqAdditions[(LocalKind, PType, PSym)] newNodes: seq[IrNode3] traces: seq[seq[StackTraceEntry]] - symStart: int nextIdx: IRIndex - nextLocal: int + +func add[T](x: var SeqAdditions[T], item: sink T): int {.inline.} = + result = x.start + x.data.len + x.data.add(item) + +func add[T](x: var SeqAdditions[T], other: openArray[T]) {.inline.} = + x.data.add(other) + +func setFrom[T](x: var SeqAdditions[T], s: seq[T]) = + # TODO: document + x.start = s.len + +func start[T](x: SeqAdditions[T]): int {.inline.} = + x.start + +func apply[T](dest: var seq[T], src: sink SeqAdditions[T]) = + # TODO: rename function or swap parameters? + dest.add(src.data) func setup*(cr: var IrCursor, ir: IrStore3) = cr.nextIdx = ir.len - cr.symStart = ir.syms.len - cr.nextLocal = ir.locals.len + cr.newSyms.setFrom(ir.syms) + cr.newLocals.setFrom(ir.locals) + cr.newLiterals.setFrom(ir.literals) func getNext(cr: var IrCursor): IRIndex {.inline.} = result = cr.nextIdx @@ -1847,8 +1870,7 @@ func insert(cr: var IrCursor, n: sink IrNode3): IRIndex = func insertSym*(cr: var IrCursor, sym: PSym): IRIndex = assert sym != nil - result = cr.insert IrNode3(kind: ntkSym, symIdx: cr.symStart + cr.newSyms.len) - cr.newSyms.add sym + cr.insert IrNode3(kind: ntkSym, symIdx: cr.newSyms.add(sym)) func insertCallExpr*(cr: var IrCursor, sym: PSym, args: varargs[IRIndex]): IRIndex = let c = cr.insertSym(sym) @@ -1895,15 +1917,10 @@ func insertJoin*(cr: var IrCursor, t: JoinPoint) = func newLocal*(cr: var IrCursor, kind: LocalKind, s: PSym): int = cr.newLocals.add((kind, s.typ, s)) - result = cr.nextLocal - inc cr.nextLocal func newLocal*(cr: var IrCursor, kind: LocalKind, t: PType): int = assert kind == lkTemp cr.newLocals.add((kind, t, nil)) - result = cr.nextLocal - inc cr.nextLocal - func insertLocalRef*(cr: var IrCursor, name: int): IRIndex = cr.insert IrNode3(kind: ntkLocal, local: name) @@ -1975,15 +1992,13 @@ func inline*(cr: var IrCursor, other: IrStore3, args: varargs[IRIndex]): IRIndex # for simplicity, the original parameter reference node is left as is patchTable[i - oldLen] = args[s.position] else: - cr.newNodes[i].symIdx += cr.symStart + cr.newSyms.len + cr.newNodes[i].symIdx += cr.newSyms.start of ntkLocal: - cr.newNodes[i].local += cr.nextLocal + cr.newNodes[i].local += cr.newLocals.start else: patch(cr.newNodes[i], patchTable) - cr.nextIdx += other.len - cr.nextLocal += other.locals.len func update*(ir: var IrStore3, cr: sink IrCursor) = ## Integrates the changes collected by the cursor `cr` into `ir` @@ -1991,10 +2006,10 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = var patchTable: seq[IRIndex] let oldLen = ir.len patchTable.newSeq(cr.nextIdx) # old ir len + insert node count - ir.syms.add(cr.newSyms) - cr.newSyms.reset() - ir.locals.add(cr.newLocals) - cr.newLocals.reset() + + ir.syms.apply(cr.newSyms) + ir.locals.apply(cr.newLocals) + var currOff = 0 var p = 0 var p1 = 0 From d5485cfe1b78b29f6f22e82f5c1c2d9fc79928a4 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:30:12 +0100 Subject: [PATCH 003/395] vmir: use a separate `seq` to store literals This simplifies processing/scanning of literal data and also removes a GC'ed data type from `IrNode3` --- compiler/vm/vmir.nim | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index cadf6f99d04..53690c6e10f 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -203,7 +203,7 @@ type arrSrc: PathIndex idx: IRIndex of ntkLit: - lit: PNode + litIdx: int of ntkLocal: local: int of ntkAddr, ntkDeref: @@ -240,6 +240,7 @@ type nodes: seq[IrNode3] joins: seq[(IRIndex, bool)] # joint point id -> ir position syms: seq[PSym] + literals: seq[PNode] locals: seq[(LocalKind, PType, PSym)] localSrc: seq[seq[StackTraceEntry]] @@ -537,7 +538,8 @@ func irAddr*(c: var IrStore3, loc: IRIndex): IRIndex = func irLit*(c: var IrStore3, n: PNode): IRIndex = #assert n.typ != nil - c.add(IrNode3(kind: ntkLit, lit: n)) + result = c.add(IrNode3(kind: ntkLit, litIdx: c.literals.len)) + c.literals.add(n) # TODO: not related to the IR. Might need a better home proc append*(dst: var CodeFragment, f: CodeFragment) = @@ -578,7 +580,7 @@ func getLocalIdx*(irs: IrStore3, n: IRIndex): int = irs.nodes[n].local func getLit*(irs: IrStore3, n: IrNode3): PNode = - n.lit + irs.literals[n.litIdx] func isLoop*(ir: IrStore3, j: JoinPoint): bool = ir.joins[j][1] @@ -1810,6 +1812,7 @@ type IrCursor* = object actions: seq[(bool, Slice[IRIndex])] # true = replace, false = insert newSyms: SeqAdditions[PSym] newLocals: SeqAdditions[(LocalKind, PType, PSym)] + newLiterals: SeqAdditions[PNode] newNodes: seq[IrNode3] traces: seq[seq[StackTraceEntry]] @@ -1883,7 +1886,8 @@ func insertCallExpr*(cr: var IrCursor, bc: BuiltinCall, typ: PType, args: vararg result = cr.insert IrNode3(kind: ntkCall, isBuiltin: true, builtin: bc, typ: typ, args: @args) func insertLit*(cr: var IrCursor, lit: PNode): IRIndex = - cr.insert IrNode3(kind: ntkLit, lit: lit) + assert lit != nil + cr.insert IrNode3(kind: ntkLit, litIdx: cr.newLiterals.add(lit)) func insertAsgn*(cr: var IrCursor, kind: AssignKind, a, b: IRIndex) = discard cr.insert IrNode3(kind: ntkAsgn, asgnKind: kind, wrDst: a, wrSrc: b) @@ -2009,6 +2013,7 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = ir.syms.apply(cr.newSyms) ir.locals.apply(cr.newLocals) + ir.literals.apply(cr.newLiterals) var currOff = 0 var p = 0 From e0bc8d97e8da2fd851e3b7bb7abfa77962d6d0ba Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:31:05 +0100 Subject: [PATCH 004/395] cgen2: `genBuiltin` no longer returns a type The type information is computed via `computeTypes` already --- compiler/vm/cgen2.nim | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 86dd078486b..6b878df0d41 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -577,20 +577,20 @@ func genBraced(elems: varargs[CAst]): CAst = func ident(c: var GlobalGenCtx, name: string): CAst = result.add cnkIdent, c.idents.getOrIncl(name).uint32 -func genBuiltin(c: var GenCtx, irs: IrStore3, bc: BuiltinCall, n: IrNode3): (CAst, PType) = +func genBuiltin(c: var GenCtx, irs: IrStore3, bc: BuiltinCall, n: IrNode3): CAst = case bc of bcNewClosure: - (genBraced(c.gl.ident("NIM_NIL"), c.gl.ident("NIM_NIL")), nil) # XXX: hmm, closure type is known during irgen time... + genBraced(c.gl.ident("NIM_NIL"), c.gl.ident("NIM_NIL")) of bcOverflowCheck: - (genArithm(c, n.args(0), true), nil) # TODO: use the type of ``n.args(0)`` + genArithm(c, n.args(0), true) of bcTestError: var ast = start() - (ast.add(cnkCall, 1).sub().ident(c.gl.idents, "NIM_UNLIKELY").emitDeref(c.gl.idents).sub().ident(c.gl.idents, "err").fin(), nil) # TODO: use tyBool + ast.add(cnkCall, 1).sub().ident(c.gl.idents, "NIM_UNLIKELY").emitDeref(c.gl.idents).sub().ident(c.gl.idents, "err").fin() of bcCast: let dstTyp = n.typ var ast = start() discard ast.add(cnkCast).add(cnkType, mapTypeV2(c, dstTyp).uint32).add(gen(c, irs, n.args(0))) - (ast.fin(), dstTyp) + ast.fin() of bcRaise: var ast = start() if argCount(n) == 0: @@ -602,10 +602,10 @@ func genBuiltin(c: var GenCtx, irs: IrStore3, bc: BuiltinCall, n: IrNode3): (CAs discard ast.add(c.gen(irs, n.args(0))).add(c.gen(irs, n.args(1))) discard ast.ident(c.gl.idents, "NIM_NIL").ident(c.gl.idents, "NIM_NIL").intLit(0) - (ast.fin(), nil) + ast.fin() else: - (genError(c, fmt"missing: {bc}"), nil) + genError(c, fmt"missing: {bc}") #unreachable(bc) type MagicKind = enum From 101a301247be96338b79acb69e013e635f552626 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:32:30 +0100 Subject: [PATCH 005/395] wip: introduce a custom representation for types and symbols --- compiler/vm/cbackend2.nim | 115 +++++-- compiler/vm/cgen2.nim | 460 ++++++++++++++-------------- compiler/vm/irdbg.nim | 13 +- compiler/vm/irgen.nim | 153 ++++++---- compiler/vm/irpasses.nim | 304 ++++++++++--------- compiler/vm/irtypes.nim | 618 +++++++++++++++++++++++++++++++++++--- compiler/vm/vmir.nim | 117 +++++--- lib/system.nim | 2 +- 8 files changed, 1231 insertions(+), 551 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index d11f131ee07..b8b9a78e220 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -7,7 +7,7 @@ import ast, ast_types, astalgo, # for `getModule` - idents, + lineinfos, reports ], compiler/backend/[ @@ -30,6 +30,7 @@ import ], compiler/vm/[ irgen, + irtypes, vmir, cgen2, irpasses, @@ -56,9 +57,9 @@ type initGlobalsCode: CodeFragment ## the bytecode of `initGlobalsProc`. Each ## encountered `{.global.}`'s init statement gets code-gen'ed into the ## `initGlobalCode` of the module that owns it - initGlobalsProc: (PSym, IrStore3) ## the proc that initializes `{.global.}` + initGlobalsProc: (SymId, IrStore3) ## the proc that initializes `{.global.}` ## variables - initProc: (PSym, IrStore3) ## the module init proc (top-level statements) + initProc: (SymId, IrStore3) ## the module init proc (top-level statements) ModuleListRef = ref ModuleList ModuleList = object of RootObj @@ -122,7 +123,7 @@ proc generateTopLevelStmts*(module: var Module, c: var TCtx, c.endProc() # the `initProc` symbol is missing a valid `ast` field - module.initProc[0] = newSym(skProc, getIdent(c.graph.cache, "init"), nextSymId c.idgen, module.sym, module.sym.info) + module.initProc[0] = c.symEnv.addSym(skProc, NoneType, "init") # TODO: non-obvious mutation, move this somewhere else module.initProc[1] = c.irs proc generateCodeForProc(c: var TCtx, s: PSym): IrGenResult = @@ -166,19 +167,19 @@ proc generateGlobalInit(c: var TCtx, f: var CodeFragment, defs: openArray[PNode] proc genInitProcCall(c: var IrStore3, m: Module) = discard c.irCall(c.irSym(m.initProc[0])) -proc generateEntryProc(c: var TCtx, info: TLineInfo, mlist: ModuleList): IrStore3 = +proc generateEntryProc(c: var TCtx, g: PassEnv, mlist: ModuleList): IrStore3 = ## Generates the entry function and returns it's function table index. ## The entry function simply calls all given `initProcs` (ordered from low ## to high by their function table index) and then returns the value of the ## ``programResult`` global - let - n = newNodeI(nkEmpty, info) # setup code-gen state. One register for the return value and one as a # temporary to hold the init procs c.prc = PProc() c.irs.reset() + let resultVar = c.irs.genLocal(lkVar, g.getSysType(tyInt)) + var systemIdx, mainIdx: int # XXX: can't use `pairs` since it copies for i in 0.. 0: for it in nextProcs.items: let mIdx = it.itemId.module let realIdx = mlist.moduleMap[it.getModule().id] + let sId = c.symEnv.requestSym(it) if g.getBody(it).kind == nkEmpty: # a quick fix to not run `irgen` for 'importc'ed procs - moduleProcs[realIdx].add((it, IrStore3())) + moduleProcs[realIdx].add((sId, IrStore3())) continue let ir = generateCodeForProc(c, it) - collectRoutineSyms(c.unwrap ir, nextProcs2, seenProcs) + collectRoutineSyms(c.unwrap ir, c.symEnv, nextProcs2, seenProcs) #doAssert mIdx == realIdx - moduleProcs[realIdx].add((it, c.unwrap ir)) + moduleProcs[realIdx].add((sId, c.unwrap ir)) + + # flush deferred types already to reduce memory usage a bit + c.types.flush(c.symEnv, g.config) nextProcs.setLen(0) swap(nextProcs, nextProcs2) + # setup a ``PassEnv`` + let passEnv = PassEnv() + block: + for sym in g.compilerprocs.items: + passEnv.compilerprocs[sym.name.s] = c.symEnv.requestSym(sym) + + # XXX: a magic is not necessarily a procedure - it can also be a type + # create a symbol for each magic to be used by the IR transformations + for m, id in passEnv.magics.mpairs: + # fetch the name from a "real" symbol + let sym = g.getSysMagic(unknownLineInfo, "", m) + + let name = + if sym.isError(): + # not every magic has symbol defined in ``system.nim`` (e.g. procs and + # types only used in the backend) + $m + else: + sym.name.s + + id = c.symEnv.addMagic(skProc, NoneType, name, m) + + for op, tbl in passEnv.attachedOps.mpairs: + for k, v in g.attachedOps[op].pairs: + let t = c.types.lookupType(k) + if t != NoneType: + tbl[t] = c.symEnv.requestSym(v) + else: + # XXX: is this case even possible + discard#echo "missing type for type-bound operation" + + for t in { tyVoid, tyInt..tyFloat64, tyBool, tyChar, tyString, tyCstring, tyPointer }.items: + passEnv.sysTypes[t] = c.types.requestType(g.getSysType(unknownLineInfo, t)) + + let entryPoint = - generateMain(c, g.getModule(conf.projectMainIdx), mlist[]) + generateMain(c, passEnv, mlist[]) + + swap(env.syms, c.symEnv) - var lpCtx = LiftPassCtx(graph: g, idgen: g.idgen, cache: g.cache) + var lpCtx = LiftPassCtx(graph: passEnv, idgen: g.idgen, cache: g.cache) + lpCtx.env = addr env for i in 0.. symbol. All used symbols that need to be declared in the C code. # TODO: should be a SymSet + syms: PackedSet[SymId] ## all used symbols that need to be declared in the C code. # TODO: should be a SymSet # TODO: header paths can be very regular. Maybe a CritBitTree[void} would make sense here? # TODO: the header includes are currently emitted in an arbitrary order, is that okay? (check the old cgen) @@ -178,7 +170,7 @@ type ctypeMap: Table[TypeKey, CTypeId] # ctypes: seq[CTypeInfo] # - defered: seq[(PType, CTypeId)] + defered: seq[(TypeId, CTypeId)] CAstBuilder = object ast: CAst @@ -188,11 +180,12 @@ const StringCType = CTypeId(1) const InvalidCIdent = CIdent(0) # warning: this depends on a implementation detail of `BiTable` +#[ func hash(a: TypeKey): Hash = - hash(PType(a).itemId) - + hash(TypeId(a).itemId) func `==`(a, b: TypeKey): bool = a.PType.itemId == b.PType.itemId +]# func `==`(a, b: CTypeId): bool {.borrow.} @@ -201,6 +194,10 @@ func mangledName(sym: PSym): string = # TODO: implement sym.name.s +func mangledName(d: Declaration): string = + # XXX: temporary + d.name + const BaseName = "Sub" ## the name of the field for the base type func add(decl: var CDecl, k: CDeclAstNodeKind; a, b: uint32 = 0) = @@ -225,15 +222,17 @@ type TypeGenCtx = object ctypes: seq[CTypeInfo] # mutated cache: IdentCache # mutated + env: ptr IrEnv + # non-inherited state - weakTypes: set[TTypeKind] # the set of types that can be turned into forward declarations when declared as a pointer + weakTypes: set[TypeNodeKind] # the set of types that can be turned into forward declarations when declared as a pointer forwardBegin: int - forwarded: seq[PType] ## types who's creation was defered. THe first entry + forwarded: seq[TypeId] ## types who's creation was defered. THe first entry ## has an ID of `forwardBegin`, the second ## `forwardBegin + 1`, etc. -func requestType(c: var TypeGenCtx, t: PType): CTypeId = +func requestType(c: var TypeGenCtx, t: TypeId): CTypeId = ## Requests the type-id for `t`. If the c-type for `t` doesn't exist yet, a ## slot for it is reserved and it's added to the `c.forwared` list let next = c.ctypes.len.CTypeId @@ -244,148 +243,155 @@ func requestType(c: var TypeGenCtx, t: PType): CTypeId = c.ctypes.setLen(c.ctypes.len + 1) c.forwarded.add(t) -func requestFuncType(c: var TypeGenCtx, t: PType): CTypeId = +func requestFuncType(c: var TypeGenCtx, t: TypeId): CTypeId = # XXX: this is going to be tricky discard -func genRecordNode(c: var TypeGenCtx, decl: var CDecl, n: PNode): int = +func genRecordNode(c: var TypeGenCtx, decl: var CDecl, i: var RecordNodeIndex, fstart: int): int = + let n = c.env.types[i] + inc i + case n.kind - of nkSym: - let s = n.sym - decl.addField(c.cache, c.requestType(s.typ), s.name.s) - result = 1 - of nkRecList: - for it in n.sons: - discard genRecordNode(c, decl, it) + of rnkList: + discard "ignore" + for _ in 0.. 0 -func getTypeName(c: var IdentCache, typ: PType): CIdent = +func getTypeName(c: var IdentCache, typ: Type, decl: Declaration): CIdent = # TODO: not finished - if typ.sym != nil: - c.getOrIncl(mangledName(typ.sym)) + if decl.name.len > 0: + c.getOrIncl(mangledName(decl)) else: - let h = hashType(typ) + let h = 0#hashType(typ) c.getOrIncl(fmt"{typ.kind}_{h}") @@ -398,14 +404,14 @@ func genForwarded(c: var TypeGenCtx) = let fwd = c.forwarded[i] # XXX: forwarded could be cleared when ``i == forwarded.high`` in # order to cut down on allocations - let decl = genCTypeDecl(c, c.forwarded[i].skipTypes(abstractInst)) - c.ctypes[c.forwardBegin + i] = CTypeInfo(decl: decl, name: getTypeName(c.cache, fwd)) + let decl = genCTypeDecl(c, c.forwarded[i]) + c.ctypes[c.forwardBegin + i] = CTypeInfo(decl: decl, name: getTypeName(c.cache, c.env.types[fwd], Declaration())) inc i c.forwarded.setLen(0) c.forwardBegin = c.ctypes.len # prepare for following ``requestType`` calls -func genCType(dest: var CDecl, cache: var IdentCache, t: PType) = +func genCType(dest: var CDecl, cache: var IdentCache, t: Type) = template addIdentNode(n: string) = dest.add cdnkIdent, cache.getOrIncl(n).uint32 @@ -416,34 +422,41 @@ func genCType(dest: var CDecl, cache: var IdentCache, t: PType) = "NU", "NU8", "NU16", "NU32", "NU64"] case t.kind - of tyVoid: addIdentNode("void") - of tyPointer, tyNil: + of tnkVoid: addIdentNode("void") + of tnkInt: + {.warning: "NI is never emitted anymore, as we can't detect an `int` here".} + addIdentNode(fmt"NI{t.size}") + of tnkUInt: + addIdentNode(fmt"NU{t.size}") + of tnkCString: dest.add cdnkPtr - addIdentNode("void") - of tyInt..tyUInt64: - addIdentNode(NumericalTypeToStr[t.kind]) - of tyCstring: addIdentNode("NIM_CHAR") - of tyBool: + of tnkChar: + addIdentNode("NIM_CHAR") + of tnkBool: addIdentNode("NIM_BOOL") else: addIdentNode(fmt"genCType_missing_{t.kind}") -func genCType(cache: var IdentCache, t: PType): CTypeInfo = +func genCType(cache: var IdentCache, t: Type): CTypeInfo = # TODO: name handling is unfinished genCType(result.decl, cache, t) - result.name = getTypeName(cache, t) + result.name = getTypeName(cache, t, Declaration()) -func useFunction(c: var ModuleCtx, s: PSym) = +func useFunction(c: var ModuleCtx, s: SymId) = ## + c.syms.incl s + #[ if lfHeader in s.loc.flags: c.headers.incl getStr(s.annex.path) elif lfNoDecl notin s.loc.flags: discard c.syms.mgetOrPut(s.id, s) + ]# -func useType(c: var ModuleCtx, t: PType) = - c.types[t.id] = t +func useType(c: var ModuleCtx, t: TypeId) = + assert t != NoneType + c.types.incl t #[ func useTypeWeak(c: var ModuleCtx, t: PType): CTypeId= @@ -452,8 +465,10 @@ func useTypeWeak(c: var ModuleCtx, t: PType): CTypeId= func useType(c: var ModuleCtx, t: PType): CTypeId = ]# -func requestFunction(c: var GlobalGenCtx, s: PSym): int = +func requestFunction(c: var GlobalGenCtx, s: SymId): int = ## Requests the ID of the C-function `s` maps to + discard "now a no-op" + #[ assert s.kind in routineKinds let nextId = c.funcs.len result = c.funcMap.mgetOrPut(s.id, nextId) @@ -461,7 +476,7 @@ func requestFunction(c: var GlobalGenCtx, s: PSym): int = assert result < nextId # the header's content is generated later; we just reserve the slot here c.funcs.setLen(c.funcs.len + 1) - + ]# func requestTypeName(c: var GlobalGenCtx, t: PType): CIdent = # TODO: not finished @@ -473,12 +488,14 @@ func requestTypeName(c: var GlobalGenCtx, t: PType): CIdent = type GenCtx = object f: File tmp: int - sym: PSym + sym: SymId names: seq[CAst] # IRIndex -> expr - types: seq[PType] + types: seq[TypeId] config: ConfigRef + env: #[lent]# ptr IrEnv + gl: GlobalGenCtx # XXX: temporary m: ModuleCtx # XXX: temporary @@ -486,27 +503,36 @@ func gen(c: GenCtx, irs: IrStore3, n: IRIndex): CAst = c.names[n] #"gen_MISSING" -func mapTypeV3(c: var GlobalGenCtx, t: PType): CTypeId +func mapTypeV3(c: var GlobalGenCtx, t: TypeId): CTypeId -func mapTypeV2(c: var GenCtx, t: PType): CTypeId = +func mapTypeV2(c: var GenCtx, t: TypeId): CTypeId = # TODO: unfinished c.m.useType(t) # mark the type as used -func mapTypeV3(c: var GlobalGenCtx, t: PType): CTypeId = - let k = t.TypeKey - result = c.ctypeMap[k] +func mapTypeV3(c: var GlobalGenCtx, t: TypeId): CTypeId = + if t != NoneType: + # XXX: maybe just have a ``NoneType`` -> ``VoidCType`` mapping in the table instead? + c.ctypeMap[t] + else: + VoidCType -func genProcHeader(c: var GlobalGenCtx, t: PType): ProcHeader = - assert t.kind == tyProc +func genProcHeader(c: var GlobalGenCtx, senv: SymbolEnv, tenv: TypeEnv, s: SymId): ProcHeader = + let + sym = senv[s] + typ = sym.typ + assert tenv[typ].kind == tnkProc - result.returnType = - if t[0].isEmptyType(): VoidCType - else: mapTypeV3(c, t[0]) + result.returnType = mapTypeV3(c, tenv.getReturnType(typ)) - result.args.newSeq(t.len - 1) - for i in 1.. 0 and t[0] != nil: - result = nthField(t[0].skipTypes(skipPtrs), pos) - -func safeKind(t: PType): TTypeKind {.inline.} = - if t == nil: tyVoid - else: t.kind - -func genLit(c: var GenCtx, lit: PNode): CAst = case lit.kind of nkIntLit: start().intLit(lit.intVal).fin() @@ -701,7 +694,7 @@ template testNode(cond: bool, i: IRIndex) = if not cond: debugEcho astToStr(cond), " failed" debugEcho "node: ", i - printIr(irs, exprs) + printIr(irs, c.env[], exprs) for e in irs.traceFor(i).items: debugEcho e if irs.at(i).kind == ntkLocal: @@ -721,17 +714,20 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = var tmp = 0 for typ, sym in irs.locals: - if sym != nil: + if sym != NoneSymbol: + discard + #[ if lfHeader in sym.loc.flags: let str = getStr(sym.annex.path) continue elif lfNoDecl in sym.loc.flags: continue + ]# result.add cnkDef result.add cnkType, mapTypeV2(c, typ).uint32 - if sym != nil: # TODO: don't test for temps like this - result.add c.gl.ident mangledName(sym) + if sym != NoneSymbol: # TODO: don't test for temps like this + result.add c.gl.ident mangledName(c.env.syms[sym].decl) else: result.add c.gl.ident(fmt"_tmp{tmp}") @@ -745,34 +741,35 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = for n in irs.nodes: case n.kind of ntkSym: - let sym = irs.sym(n) + let sId = irs.sym(n) + let sym = c.env.syms[sId] # TODO: refactor if sym.kind in routineKinds and sym.magic == mNone: - useFunction(c.m, sym) + useFunction(c.m, sId) elif sym.kind in {skVar, skLet} and sfGlobal in sym.flags: - c.m.syms[sym.id] = sym + c.m.syms.incl sId #discard mapTypeV3(c.gl, sym.typ) # XXX: temporary - if sym.kind notin routineKinds and sym.typ != nil: + if sym.kind notin routineKinds and sym.typ != NoneType: useType(c.m, sym.typ) - names[i] = start().ident(c.gl.idents, mangledName(sym)).fin() + names[i] = start().ident(c.gl.idents, mangledName(sym.decl)).fin() of ntkLocal: let (kind, typ, sym) = irs.getLocal(i) - if sym == nil: + if sym == NoneSymbol: names[i] = start().ident(c.gl.idents, "_tmp" & $c.tmp).fin() inc c.tmp else: - names[i] = start().ident(c.gl.idents, mangledName(sym)).fin() + names[i] = start().ident(c.gl.idents, mangledName(c.env.syms[sym].decl)).fin() of ntkCall: if n.isBuiltIn: - let (name, typ) = genBuiltin(c, irs, n.builtin, n) + let name = genBuiltin(c, irs, n.builtin, n) names[i] = name else: let callee = irs.at(n.callee) - if callee.kind == ntkSym and irs.sym(callee).magic != mNone: - names[i] = genMagic(c, irs, irs.sym(callee).magic, n) + if callee.kind == ntkSym and (let s = c.env.syms[irs.sym(callee)]; s.magic != mNone): + names[i] = genMagic(c, irs, s.magic, n) else: var res = start().add(cnkCall, n.argCount.uint32).add(names[n.callee]) for it in n.args: @@ -786,30 +783,24 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = of ntkAddr: names[i] = start().emitAddr(c.gl.idents).add(names[n.addrLoc]).fin() of ntkDeref: - let t = types[n.addrLoc].skipTypes(abstractInst) - testNode t.kind in {tyPtr, tyRef, tyVar, tyLent, tySink}, n.addrLoc names[i] = start().emitDeref(c.gl.idents).add(names[n.addrLoc]).fin() of ntkAsgn: + testNode names[n.srcLoc].len > 0, i result.add start().add(cnkInfix).add(names[n.wrLoc]).ident(c.gl.idents, "=").add(names[n.srcLoc]).fin() inc numStmts of ntkPathObj: - let typ = types[n.srcLoc].skipTypes(abstractInst) + let + typId = types[n.srcLoc] + typ = c.env.types[typId] + field = c.env.types.field(c.env.types.nthField(typId, n.fieldIdx).toIndex) let src = names[n.srcLoc] - let idx = n.fieldIdx var ast = start().add(cnkDotExpr).add(src) - case typ.kind - of tyObject: - let f = typ.nthField(n.fieldIdx) - discard ast.ident(c.gl.idents, mangledName(f)) - of tyTuple: - if typ.n != nil: - discard ast.ident(c.gl.idents, typ.n[idx].sym.mangledName()) - else: - # annonymous tuple - discard ast.ident(c.gl.idents, fmt"Field{idx}") + if field.sym != NoneSymbol: + discard ast.ident(c.gl.idents, mangledName(c.env.syms[field.sym].decl)) else: - testNode false, n.srcLoc + # TODO: this needs some name clash protection + discard ast.ident(c.gl.idents, fmt"Field{n.fieldIdx}") names[i] = ast.fin() @@ -850,7 +841,8 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = inc i # exit - if c.sym.typ.n[0].typ.isEmptyType(): + # TODO: ``NoneType`` should only mean "no type information", not "void" + if c.env.types.getReturnType(c.env.syms[c.sym].typ) != NoneType: result.add cnkReturn else: result.add cnkReturn, 1 @@ -1124,10 +1116,10 @@ proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo) = assert pos == info.decl.len -proc writeDecl(f: File, c: GlobalGenCtx, h: ProcHeader, name: PSym) = +proc writeDecl(f: File, c: GlobalGenCtx, h: ProcHeader, decl: Declaration) = emitType(f, c, h.returnType) f.write(" ") - f.write(mangledName(name)) + f.write(mangledName(decl)) f.write("(") for i, it in h.args.pairs: if i > 0: @@ -1137,10 +1129,10 @@ proc writeDecl(f: File, c: GlobalGenCtx, h: ProcHeader, name: PSym) = f.writeLine(");") -proc writeDef(f: File, c: GlobalGenCtx, h: ProcHeader, name: PSym) = +proc writeDef(f: File, c: GlobalGenCtx, h: ProcHeader, decl: Declaration) = emitType(f, c, h.returnType) f.write(" ") - f.write(mangledName(name)) + f.write(mangledName(decl)) f.write("(") for i, it in h.args.pairs: if i > 0: @@ -1152,7 +1144,7 @@ proc writeDef(f: File, c: GlobalGenCtx, h: ProcHeader, name: PSym) = f.writeLine(") {") -proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray[(PSym, IrStore3)]) = +proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, env: IrEnv, procs: openArray[(SymId, IrStore3)]) = let f = open(filename.string, fmWrite) defer: f.close() @@ -1163,7 +1155,7 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray mCtx: ModuleCtx asts: seq[CAst] - tgc = TypeGenCtx(weakTypes: {tyObject, tyTuple}) + tgc = TypeGenCtx(weakTypes: {tnkRecord}, env: unsafeAddr env) template swapTypeCtx() = swap(tgc.tm, ctx.ctypeMap) @@ -1180,16 +1172,16 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray for sym, irs in procs.items: useFunction(mCtx, sym) - if sfImportc in sym.flags: + if sfImportc in env.syms[sym].flags: asts.add(default(CAst)) continue - echo "genFor: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) - var c = GenCtx(f: f, config: conf, sym: sym) + echo "genFor: ", env.syms[sym].decl.name #, " at ", conf.toFileLineCol(sym.info) + var c = GenCtx(f: f, config: conf, sym: sym, env: unsafeAddr env) # doing a separate pass for the type computation instead of doing it in # `genCode` is probably a bit less efficient, but it's also simpler; # requires less code duplication; and is also good for modularity - c.types = computeTypes(irs) + c.types = computeTypes(irs, env) swapTypeCtx() @@ -1197,7 +1189,7 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray # means that the C-type equivalents are created, not that the declarations # are also emitted in the output file for t in c.types.items: - if t != nil: + if t != NoneType: discard tgc.requestType(t) swapTypeCtx() @@ -1212,11 +1204,14 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray # XXX: this might lead to an ordering problem, since we're not registering # the types on the first occurence # mark the types used in routine signatures as used - for sym in mCtx.syms.values: + for id in mCtx.syms.items: + let sym = env.syms[id] case sym.kind of routineKinds: - for it in sym.typ.sons: - if it != nil: + if sym.typ == NoneType: continue + + for it in env.types.params(sym.typ): + if it != NoneType: discard tgc.requestType(it) mCtx.useType(it) else: @@ -1230,7 +1225,7 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray var used: seq[CTypeId] block: - for typ in mCtx.types.values: + for typ in mCtx.types.items: used.add ctx.ctypeMap[typ.TypeKey] for i, t in ctx.ctypes.pairs: @@ -1266,17 +1261,18 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray # generate all procedure forward declarations - for sym in mCtx.syms.values: + for id in mCtx.syms.items: + let sym = env.syms[id] case sym.kind of routineKinds: #echo "decl: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) - let hdr = genProcHeader(ctx, sym.typ) + let hdr = genProcHeader(ctx, env.syms, env.types, id) - writeDecl(f, ctx, hdr, sym) + writeDecl(f, ctx, hdr, sym.decl) of skLet, skVar: emitType(f, ctx, ctx.ctypeMap[sym.typ.TypeKey]) f.write " " - f.write mangledName(sym) + f.write mangledName(sym.decl) f.writeLine ";" of skConst: f.writeLine "EMIT_ERROR(\"missing logic: const\")" @@ -1289,13 +1285,15 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, procs: openArray inc i continue - let (sym, _) = procs[i] - let hdr = genProcHeader(ctx, sym.typ) - writeDef(f, ctx, hdr, sym) + let + (id, _) = procs[i] + sym = env.syms[id] + let hdr = genProcHeader(ctx, env.syms, env.types, id) + writeDef(f, ctx, hdr, sym.decl) try: emitCAst(f, ctx, it) except: - echo "emit: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) + echo "emit: ", sym.decl.name#, " at ", conf.toFileLineCol(sym.info) raise f.writeLine "}" inc i \ No newline at end of file diff --git a/compiler/vm/irdbg.nim b/compiler/vm/irdbg.nim index 1dac223587c..5ae7f8d34d9 100644 --- a/compiler/vm/irdbg.nim +++ b/compiler/vm/irdbg.nim @@ -36,13 +36,13 @@ func calcStmt*(irs: IrStore3): seq[bool] = inc i -proc printIr*(irs: IrStore3, exprs: seq[bool]) = +proc printIr*(irs: IrStore3, e: IrEnv, exprs: seq[bool]) = var i = 0 for n in irs.nodes: var line = "" case n.kind of ntkSym: - line = fmt"sym {irs.sym(n).name.s}" + line = fmt"sym {irs.sym(n).int}" of ntkAsgn: case n.asgnKind of askCopy, askDiscr: @@ -56,14 +56,19 @@ proc printIr*(irs: IrStore3, exprs: seq[bool]) = of ntkDeref: line = fmt"deref {n.addrLoc}" of ntkLit: - line = fmt"lit {irs.getLit(n).kind}" + let val = irs.getLit(n).val + if val.isNil: + # a type literal + line = fmt"lit 'nil'" + else: + line = fmt"lit {val.kind}" of ntkUse: line = fmt"use {n.srcLoc}" of ntkGoto: line = fmt"goto label:{n.target}" of ntkLocal: let (k, t, _) = irs.getLocal(i) - line = fmt"local kind:{k} idx:{irs.getLocalIdx(i)} typ:{t.kind}" + line = fmt"local kind:{k} idx:{irs.getLocalIdx(i)} typ:{e.types[t].kind}" of ntkPathObj: line = fmt"path obj:{n.srcLoc} field:{n.fieldIdx}" of ntkPathArr: diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 3b071889ebb..0beb2c20e67 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -21,6 +21,7 @@ import options ], compiler/vm/[ + irtypes, vmir ], experimental/[ @@ -73,6 +74,9 @@ type TCtx* = object options*: set[TOption] + symEnv*: SymbolEnv + types*: DeferredTypeGen + type IrGenResult* = Result[IrStore3, SemReport] @@ -109,18 +113,31 @@ func fail( info, loc) -func irParam(ir: var IrStore3, sym: PSym): IRIndex = - ir.irSym(sym) +func irSym(c: var TCtx, sym: PSym): IRIndex = + let id = c.symEnv.requestSym(sym) + c.irs.irSym(id) + +func irParam(c: var TCtx, sym: PSym): IRIndex = + c.irSym(sym) + +func irGlobal(c: var TCtx, sym: PSym): IRIndex = + c.irSym(sym) + +func irConst(c: var TCtx, sym: PSym): IRIndex = + c.irSym(sym) -func irGlobal(ir: var IrStore3, sym: PSym): IRIndex = - ir.irSym(sym) +func irLit(c: var TCtx, n: PNode): IRIndex = + let typ = + if n.typ != nil: + c.types.requestType(n.typ) + else: + NoneType -func irConst(ir: var IrStore3, sym: PSym): IRIndex = - ir.irSym(sym) + c.irs.irLit((n, typ)) proc irImm(c: var TCtx, val: SomeInteger): IRIndex = # XXX: getSysType has side-effects - c.irs.irLit newIntTypeNode(BiggestInt(val), c.graph.getSysType(unknownLineInfo, tyInt)) + c.irLit newIntTypeNode(BiggestInt(val), c.graph.getSysType(unknownLineInfo, tyInt)) template tryOrReturn(code): untyped = try: @@ -145,27 +162,40 @@ func closeScope(c: var TCtx) = proc genProcSym(c: var TCtx, n: PNode): IRIndex = assert n.kind == nkSym - c.irs.irSym(n.sym) + c.irSym(n.sym) proc irCall(c: var TCtx, name: string, args: varargs[IRIndex]): IRIndex = # TODO: compiler procs should be cached here in `TCtx` let sym = c.graph.getCompilerProc(name) - c.irs.irCall(c.irs.irSym(sym), args) + c.irs.irCall(c.irSym(sym), args) func irCall(c: var TCtx, name: string, m: TMagic, args: varargs[IRIndex]): IRIndex {.inline.} = # TODO: instead of creating a new duplicate magic each time, all used magics # should be only created once and then reused let sym = createMagic(c.graph, c.idgen, name, m) - c.irs.irCall(c.irs.irSym(sym), args) + c.irs.irCall(c.irSym(sym), args) + +func genLocal(c: var TCtx, kind: LocalKind, t: PType): IRIndex = + let + tid = c.types.requestType(t) + + c.irs.genLocal(kind, tid) + +func genLocal(c: var TCtx, kind: LocalKind, s: PSym): IRIndex = + let + sid = c.symEnv.requestSym(s) + tid = c.types.requestType(s.typ) + + c.irs.genLocal(kind, tid, sid) proc getTemp(cc: var TCtx; tt: PType): IRIndex = - let id = cc.irs.genLocal(lkTemp, tt) + let id = cc.genLocal(lkTemp, tt) cc.irs.irLocal(id) -func irNull(c: var IrStore3, t: PType): IRIndex = +func irNull(c: var TCtx, t: PType): IRIndex = # XXX: maybe `irNull` should be a dedicated IR node? let id = c.genLocal(lkTemp, t) - c.irLocal(id) + c.irs.irLocal(id) proc popBlock(c: var TCtx; oldLen: int) = #for f in c.prc.blocks[oldLen].fixups: @@ -404,7 +434,7 @@ proc genCase(c: var TCtx; n: PNode, next: JoinPoint): IRIndex = # elif branches were eliminated during transformation doAssert branch.kind == nkOfBranch - let cond = c.irs.irCall(bcOf, nil, c.irs.irLit(branch)) + let cond = c.irs.irCall(bcOf, NoneType, c.irLit(branch)) c.irs.irBranch(cond, b) r = c.gen2(branch.lastSon) @@ -510,10 +540,10 @@ proc genRaise(c: var TCtx; n: PNode) = # get the exception name let name = newStrNode(nkStrLit, typ.sym.name.s)#c.genLit(n[0], c.toStringCnst(typ.sym.name.s)) - discard c.irs.irCall(bcRaise, nil, dest, c.irs.irLit name) + discard c.irs.irCall(bcRaise, NoneType, dest, c.irLit name) else: # reraise - discard c.irs.irCall(bcRaise, nil) + discard c.irs.irCall(bcRaise, NoneType) # XXX: if the exception's type is statically known, we could do the # exception branch matching at compile-time (i.e. here) @@ -529,11 +559,11 @@ proc genReturn(c: var TCtx; n: PNode): IRIndex = c.irs.irGoto(NormalExit) proc genLit(c: var TCtx; n: PNode): IRIndex = - c.irs.irLit(n) + c.irLit(n) proc genProcLit(c: var TCtx, n: PNode, s: PSym): IRIndex = - c.irs.irSym(s) + c.irSym(s) #[ func doesAlias(c: TCtx, a, b: IRIndex): bool = @@ -550,7 +580,7 @@ proc raiseExit(c: var TCtx) = # TODO: document # if isError: goto surrounding handler - let cond = c.irs.irCall(bcTestError, nil) # XXX: should pass tyBool + let cond = c.irs.irCall(bcTestError, NoneType) # XXX: should pass tyBool c.irs.irBranch(cond, c.prc.nextHandler()) func isVarParam(t: PType): bool = @@ -671,15 +701,15 @@ proc genField(c: TCtx; n: PNode): int = result = s.position -func irLit(ir: var IrStore3, i: SomeInteger): IRIndex = - ir.irLit newIntNode(nkIntLit, BiggestInt(i)) +func irLit(c: var TCtx, i: SomeInteger): IRIndex = + c.irLit newIntNode(nkIntLit, BiggestInt(i)) proc genIndex(c: var TCtx; n: PNode; arr: PType): IRIndex = if arr.skipTypes(abstractInst).kind == tyArray and (let x = firstOrd(c.config, arr); x != Zero): let tmp = c.genx(n) - result = c.irCall("-", mSubI, tmp, c.irs.irLit(toInt(x))) + result = c.irCall("-", mSubI, tmp, c.irLit(toInt(x))) else: result = c.genx(n) @@ -698,7 +728,7 @@ proc isInt16Lit(n: PNode): bool = if n.kind in {nkCharLit..nkUInt64Lit}: result = n.intVal >= low(int16) and n.intVal <= high(int16) -func wrapIf(c: var TCtx, wrapper: BuiltinCall, typ: PType, expr: IRIndex, cond: bool): IRIndex {.inline.} = +func wrapIf(c: var TCtx, wrapper: BuiltinCall, typ: TypeId, expr: IRIndex, cond: bool): IRIndex {.inline.} = if cond: c.irs.irCall(wrapper, typ, expr) else: expr @@ -713,7 +743,7 @@ proc genMagic(c: var TCtx; n: PNode; m: TMagic): IRIndex = # idea: also insert builtin calls to the various check functions here. # Makes it easier to get uniformity across the back-ends. result = c.genCall(n) - result = c.wrapIf(bcOverflowCheck, n.typ, result, optOverflowCheck notin c.options) + result = c.wrapIf(bcOverflowCheck, c.types.requestType(n.typ), result, optOverflowCheck notin c.options) if optOverflowCheck in c.options: # idea: defects (or error in general) could be encoded as part of the values. I.e. a # `bcOverflowCheck` call would return a result-like value (only on @@ -738,7 +768,7 @@ proc genMagic(c: var TCtx; n: PNode; m: TMagic): IRIndex = result = c.irCall("getTypeInfo", mGetTypeInfo, genTypeLit(c, n[1].typ)) of mDefault: - result = c.irs.irNull(n.typ) + result = c.irNull(n.typ) of mRunnableExamples: discard "just ignore any call to runnableExamples" of mDestroy, mTrace: @@ -841,11 +871,11 @@ proc genDiscrVal(c: var TCtx, discr: PSym, n: PNode, oty: PType): (IRIndex, IRIn assert b != -1 # no matching branch; should have been caught already result[0] = c.genLit(n) # discr value - result[1] = c.irs.irLit(b) # branch index + result[1] = c.irLit(b) # branch index else: let tmp = c.genx(n) result[0] = tmp - result[1] = c.irs.irCall(bcGetBranchIndex, nil, tmp, c.genTypeLit(oty), c.irs.irSym(discr)) + result[1] = c.irs.irCall(bcGetBranchIndex, NoneType, tmp, c.genTypeLit(oty), c.irSym(discr)) func isCursor(n: PNode): bool @@ -866,7 +896,7 @@ proc genFieldAsgn(c: var TCtx, obj: IRIndex; le, ri: PNode) = #tmp = c.genDiscrVal(le[1], ri, le[0].typ) #c.irs.irAsgn(askDiscr, p, tmp) let (dVal, bVal) = c.genDiscrVal(s, ri, le[0].typ) - discard c.irs.irCall(bcSwitch, nil, p, dVal, bVal) + discard c.irs.irCall(bcSwitch, NoneType, p, dVal, bVal) func isCursor(n: PNode): bool = case n.kind @@ -883,9 +913,9 @@ func isCursor(n: PNode): bool = proc genRdVar(c: var TCtx; n: PNode;): IRIndex = let s = n.sym if sfGlobal in s.flags: - c.irs.irGlobal(s) + c.irGlobal(s) elif s.kind == skParam: - c.irs.irParam(s) + c.irParam(s) elif s.kind == skResult: c.irs.irLocal(0) # TODO: don't hardcode else: c.irs.irLocal(c.prc.local(s)) @@ -958,13 +988,13 @@ proc genCheckedObjAccessAux(c: var TCtx; n: PNode; dest: var IRIndex) = if optFieldCheck in c.options: let discVal = c.irs.irUse(c.irs.irPathObj(dest, genField(c, disc))) - var cond = c.irCall("contains", mInSet, c.irs.irLit(checkExpr[1]), discVal) + var cond = c.irCall("contains", mInSet, c.irLit(checkExpr[1]), discVal) if negCheck: cond = c.irCall("not", mNot, cond) let lab1 = c.irs.irJoinFwd() c.irs.irBranch(cond, lab1) - discard c.irs.irCall(bcRaiseFieldErr, nil, discVal) + discard c.irs.irCall(bcRaiseFieldErr, NoneType, discVal) c.raiseExit() c.irs.irJoin(lab1) @@ -985,7 +1015,7 @@ proc genCheckedObjAccess(c: var TCtx; n: PNode): IRIndex = c.irs.irPathObj(objR, fieldPos) func genTypeLit(c: var TCtx, t: PType): IRIndex = - c.irs.irLit(PNode(kind: nkType, typ: t)) + c.irs.irLit((nil, c.types.requestType(t))) proc genArrAccess(c: var TCtx; n: PNode): IRIndex = let arrayType = n[0].typ.skipTypes(abstractVarRange-{tyTypeDesc}).kind @@ -1003,7 +1033,8 @@ proc genArrAccess(c: var TCtx; n: PNode): IRIndex = func addVariable(c: var TCtx, kind: LocalKind, s: PSym): IRIndex = assert kind != lkTemp - let id = c.irs.genLocal(kind, s) + + let id = c.genLocal(kind, s) c.prc.locals[s.id] = id c.prc.variables.add(id) inc c.prc.numLocals[^1] @@ -1021,7 +1052,7 @@ proc genVarTuple(c: var TCtx, kind: LocalKind, n: PNode) = if n[i].kind == nkSym: let s = n[i].sym - if s.isGlobal: c.irs.irGlobal(s) + if s.isGlobal: c.irGlobal(s) else: c.addVariable(kind, s) else: c.genx(n[i]) @@ -1055,14 +1086,14 @@ proc genLocalInit(c: var TCtx, kind: LocalKind, a: PNode) = # a function-level global if a[2].kind != nkEmpty: - let dest = c.irs.irGlobal(s) + let dest = c.irGlobal(s) # we don't know if the global was initialized already so we # always copy genAsgn(c, dest, a[2], requiresCopy=true) else: let local = c.addVariable(kind, s) let val = - if a[2].kind == nkEmpty: c.irs.irNull(s.typ) + if a[2].kind == nkEmpty: c.irNull(s.typ) else: genx(c, a[2]) # TODO: assign kind handling needs to be rethought, an assign can be both an init _and_ a move (or shallow) @@ -1130,16 +1161,16 @@ proc genSetConstr(c: var TCtx, n: PNode): IRIndex = if x.kind == nkRange: let a = c.genSetElem(x[0], first) let b = c.genSetElem(x[1], first) - discard c.irs.irCall(bcInclRange, nil, result, a, b) + discard c.irs.irCall(bcInclRange, NoneType, result, a, b) else: let a = c.genSetElem(x, first) discard c.irCall("incl", mIncl, result, a) -func irConv(s: var IrStore3, typ: PType, val: IRIndex): IRIndex = - result = s.irCall(bcConv, typ, val) +func irConv(c: var TCtx, typ: PType, val: IRIndex): IRIndex = + result = c.irs.irCall(bcConv, c.types.requestType(typ), val) -func irCast(s: var IrStore3, typ: PType, val: IRIndex): IRIndex = - result = s.irCall(bcCast, typ, val) +func irCast(c: var TCtx, typ: PType, val: IRIndex): IRIndex = + result = c.irs.irCall(bcCast, c.types.requestType(typ), val) proc genObjConstr(c: var TCtx, n: PNode): IRIndex = @@ -1147,7 +1178,7 @@ proc genObjConstr(c: var TCtx, n: PNode): IRIndex = let t = n.typ.skipTypes(abstractRange+{tyOwned}-{tyTypeDesc}) var obj: IRIndex if t.kind == tyRef: - let nSym = c.irs.irSym getSysSym(c.graph, n.info, "internalNew") + let nSym = c.irSym getSysSym(c.graph, n.info, "internalNew") discard c.irs.irCall(nSym, result) obj = c.irs.irDeref(result) else: @@ -1167,7 +1198,7 @@ proc genObjConstr(c: var TCtx, n: PNode): IRIndex = # XXX: this is a hack to make `tests/vm/tconst_views` work for now. # `transf` removes `nkHiddenStdConv` for array/seq to openArray # conversions, which we could have otherwise relied on - tmp = c.irs.irConv(le, tmp) + tmp = c.irConv(le, tmp) c.irs.irAsgn(askInit, c.irs.irPathObj(obj, idx), tmp) else: @@ -1196,10 +1227,10 @@ proc genTupleConstr(c: var TCtx, n: PNode): IRIndex = proc genClosureConstr(c: var TCtx, n: PNode): IRIndex = let tmp = c.genx(n[0]) let env = - if n[1].kind == nkNilLit: c.irs.irNull(c.graph.getSysType(n.info, tyNil)) + if n[1].kind == nkNilLit: c.irNull(c.graph.getSysType(n.info, tyNil)) else: c.genx(n[1]) - c.irs.irCall(bcNewClosure, n.typ, tmp, env) + c.irs.irCall(bcNewClosure, c.types.requestType(n.typ), tmp, env) template wrapCf(code) = let next {.inject.} = c.irs.irJoinFwd() @@ -1209,6 +1240,10 @@ template wrapCf(code) = proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = when defined(nimCompilerStacktraceHints): setFrameMsg c.config$n.info & " " & $n.kind + + template nodeType(): TypeId = + c.types.requestType(n.typ) + dest = InvalidIndex case n.kind of nkError: @@ -1225,7 +1260,7 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = dest = genProcLit(c, n, s) of skConst: # ``transf`` should've inlined all simple constants already - dest = c.irs.irConst(s) + dest = c.irConst(s) of skEnumField: unreachable("skEnumField not folded") @@ -1254,7 +1289,7 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = else: dest = genCall(c, n) of nkCharLit..nkInt64Lit: - dest = c.irs.irLit(n) + dest = c.irLit(n) of nkUIntLit..pred(nkNilLit): dest = genLit(c, n) of nkNilLit: if not n.typ.isEmptyType: @@ -1262,7 +1297,7 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = internalAssert(c.config, t.kind in {tyPtr, tyRef, tyPointer, tyNil, tyProc, tyCstring}, n.info, $t.kind) - dest = c.irs.irNull(t) + dest = c.irNull(t) else: doAssert false, "why is this needed again?"#unused(c, n) of nkAsgn, nkFastAsgn: genAsgn(c, n[0], n[1], n.kind == nkAsgn) @@ -1309,11 +1344,11 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = discard genx(c, n[0]) # TODO: something like `irVoid` might make sense... of nkHiddenStdConv, nkHiddenSubConv, nkConv: - dest = c.irs.irConv(n.typ, c.genx(n[1])) + dest = c.irConv(n.typ, c.genx(n[1])) of nkObjDownConv: - dest = c.irs.irConv(n.typ, c.genx(n[0])) + dest = c.irConv(n.typ, c.genx(n[0])) of nkObjUpConv: - dest = c.irs.irConv(n.typ, c.genx(n[0])) + dest = c.irConv(n.typ, c.genx(n[0])) of nkVarSection, nkLetSection: genVarSection(c, n) of nkLambdaKinds: @@ -1331,9 +1366,9 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = if optRangeCheck notin c.options or (destTyp.kind in {tyUInt..tyUInt64} and checkUnsignedConversions notin c.config.legacyFeatures): # skip the range-check if range-checks are disabled or not applicable - dest = c.irs.irConv(n.typ, tmp0) + dest = c.irConv(n.typ, tmp0) else: - dest = c.irs.irCall(bcRangeCheck, n.typ, tmp0, tmp1, tmp2) + dest = c.irs.irCall(bcRangeCheck, nodeType(), tmp0, tmp1, tmp2) raiseExit(c) of routineDefs: @@ -1346,23 +1381,23 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = gen(c, n[0], dest) of nkBracket: if isDeepConstExpr(n): - dest = c.irs.irLit(n) + dest = c.irLit(n) elif skipTypes(n.typ, abstractVarRange).kind == tySequence: # XXX: why is this even possible? It is, yes - #c.irs.irLit() + #c.irLit() doAssert false else: dest = genArrayConstr(c, n) of nkCurly: if isDeepConstExpr(n): - dest = c.irs.irLit(n) + dest = c.irLit(n) else: dest = genSetConstr(c, n) of nkObjConstr: dest = genObjConstr(c, n) of nkPar, nkTupleConstr: dest = genTupleConstr(c, n) of nkClosure: dest = genClosureConstr(c, n) of nkCast: - dest = c.irs.irCast(n.typ, c.genx(n[1])) + dest = c.irCast(n.typ, c.genx(n[1])) of nkTypeOfExpr: dest = genTypeLit(c, n.typ) else: @@ -1484,7 +1519,7 @@ proc genProcBody(c: var TCtx; s: PSym, body: PNode) = # TODO: what's the sfPure flag check needed for? if not s.typ[0].isEmptyType() and sfPure notin s.flags: # important: the 'result' variable is not tracked in ``prc.variables`` - discard c.irs.genLocal(lkVar, s.ast[resultPos].sym) + discard c.genLocal(lkVar, s.ast[resultPos].sym) gen(c, body) diff --git a/compiler/vm/irpasses.nim b/compiler/vm/irpasses.nim index 8dc97caab97..c43d7ffe857 100644 --- a/compiler/vm/irpasses.nim +++ b/compiler/vm/irpasses.nim @@ -62,6 +62,20 @@ template customAssert(cond: bool, node: IRIndex) = if not cond: raise (ref PassError)(msg: astToStr(cond), n: node) +type PassEnv* = ref object # XXX: will be a non-`ref` later on + magics*: Table[TMagic, SymId] + compilerprocs*: Table[string, SymId] + + attachedOps*: array[TTypeAttachedOp, Table[TypeId, SymId]] + + sysTypes*: array[TTypeKind, TypeId] + +func getCompilerProc*(g: PassEnv, name: string): SymId = + g.compilerprocs[name] + +func getSysType*(g: PassEnv, kind: TTypeKind): TypeId = + g.sysTypes[kind] + proc runPass*[T](irs: var IrStore3, ctx: T, pass: LinearPass[T]) = var cursor: IrCursor cursor.setup(irs) @@ -76,7 +90,7 @@ proc runPass*[T](irs: var IrStore3, ctx: T, pass: LinearPass[T]) = echo e.getStackTrace() echo "Msg: ", e.msg echo "IR (error at node: ", e.n, "):" - printIr(irs, calcStmt(irs)) + #printIr(irs, calcStmt(irs)) echo "Node was added at: " for e in irs.traceFor(e.n).items: debugEcho e @@ -97,7 +111,7 @@ proc runPass*[T](irs: var IrStore3, ctx: var T, pass: LinearPass2[T]) = echo e.getStackTrace() echo "Msg: ", e.msg echo "IR (error at node: ", e.n, "):" - printIr(irs, calcStmt(irs)) + #printIr(irs, calcStmt(irs)) echo "Node was added at: " for e in irs.traceFor(e.n).items: debugEcho e @@ -479,7 +493,7 @@ func nthField(t: PType, pos: int): PSym = if result == nil and t.len > 0 and t[0] != nil: result = nthField(t[0].skipTypes(skipPtrs), pos) -func computeTypes*(ir: IrStore3): seq[PType] = +func computeTypes*(ir: IrStore3, env: IrEnv): seq[TypeId] = result.newSeq(ir.len) var i = 0 for n in ir.nodes: @@ -494,23 +508,23 @@ func computeTypes*(ir: IrStore3): seq[PType] = else: let callee = ir.at(n.callee) if callee.kind != ntkSym: - result[n.callee][0] # the callee's return type - elif (let s = ir.sym(callee); s.typ != nil): - s.typ[0] + env.types.getReturnType(result[n.callee]) # the callee's return type + elif (let t = env.syms[ir.sym(callee)].typ; t != NoneType): + env.types.getReturnType(t) else: # the symbol for magics created with ``createMagic`` don't have # type information - nil + NoneType of ntkLit: result[i] = ir.getLit(n).typ of ntkSym: let s = ir.sym(n) - customAssert s != nil, i - if s.kind notin routineKinds: + customAssert s != NoneSymbol, i + if env.syms[s].kind notin routineKinds: # don't compute the type for routine symbols. This makes it easier to # figure out the type dependencies later on. - result[i] = s.typ + result[i] = env.syms[s].typ of ntkUse, ntkConsume: result[i] = result[n.srcLoc] of ntkLocal: @@ -520,71 +534,79 @@ func computeTypes*(ir: IrStore3): seq[PType] = # the correct type without creating a new one result[i] = result[n.addrLoc] of ntkDeref: - let t = result[n.addrLoc].skipTypes(abstractInst) - customAssert t.kind in {tyPtr, tyRef, tyVar, tyLent}, i - result[i] = t.elemType + let t = result[n.addrLoc] + customAssert env.types[t].kind in {tnkPtr, tnkRef, tnkVar, tnkLent}, i + result[i] = env.types.elemType(t) of ntkPathObj: - customAssert result[n.srcLoc] != nil, n.srcLoc - let typ = result[n.srcLoc].skipTypes(abstractInst) + customAssert result[n.srcLoc] != NoneType, n.srcLoc + let typ = result[n.srcLoc] let idx = n.fieldIdx - case typ.kind - of tyObject: - let f = typ.nthField(n.fieldIdx) - result[i] = f.typ - of tyTuple: - result[i] = typ[idx] + case env.types[typ].kind + of tnkRecord: + let f = env.types.nthField(typ, n.fieldIdx) + result[i] = env.types[f].typ else: customAssert false, n.srcLoc of ntkPathArr: - result[i] = result[n.srcLoc].elemType() + result[i] = env.types.elemType(result[n.srcLoc]) else: debugEcho "computeTypes missing: ", n.kind inc i -func getMagic(ir: IrStore3, n: IrNode3): TMagic = +func getMagic(ir: IrStore3, env: IrEnv, n: IrNode3): TMagic = assert n.kind == ntkCall if n.isBuiltIn: mNone else: let callee = ir.at(n.callee) if callee.kind == ntkSym: - ir.sym(callee).magic + env.syms[ir.sym(callee)].magic else: mNone func insertLit(cr: var IrCursor, lit: string): IRIndex = - cr.insertLit newStrNode(nkStrLit, lit) + cr.insertLit (newStrNode(nkStrLit, lit), NoneType) func insertLit(cr: var IrCursor, i: int): IRIndex = - cr.insertLit newIntNode(nkIntLit, i) + cr.insertLit (newIntNode(nkIntLit, i), NoneType) -proc insertMagicCall(cr: var IrCursor, g: ModuleGraph, name: string, m: TMagic, args: varargs[IRIndex]): IRIndex {.discardable.} = - cr.insertCallExpr(createMagic(g, g.idgen, name, m), args) +proc insertMagicCall(cr: var IrCursor, g: PassEnv, m: TMagic, args: varargs[IRIndex]): IRIndex {.discardable.} = + cr.insertCallExpr(g.magics[m], args) -proc insertCompProcCall(cr: var IrCursor, g: ModuleGraph, name: string, args: varargs[IRIndex]): IRIndex {.discardable.} = - cr.insertCallExpr(g.getCompilerProc(name), args) +proc insertCompProcCall(cr: var IrCursor, g: PassEnv, name: string, args: varargs[IRIndex]): IRIndex {.discardable.} = + cr.insertCallExpr(g.compilerprocs[name], args) type RefcPassCtx* = object graph: ModuleGraph idgen: IdGenerator - types: seq[PType] + + extra: PassEnv + + env: ptr IrEnv # XXX: in order to get to something working, a `ptr` for now + types: seq[TypeId] # XXX: only used for the ``lowerSeqs`` passes, but `RefcPassCtx` is # currently (ab)-used as the context for most passes localMap: Table[int, int] # old local-name -> new local-name -func setupRefcPass*(c: var RefcPassCtx, g: ModuleGraph, idgen: IdGenerator, ir: IrStore3) = - c.types = computeTypes(ir) # XXX: very bad +func setupRefcPass*(c: var RefcPassCtx, pe: PassEnv, env: ptr IrEnv, g: ModuleGraph, idgen: IdGenerator, ir: IrStore3) = + c.types = computeTypes(ir, env[]) # XXX: very bad c.graph = g c.idgen = idgen + c.extra = pe + c.env = env -func typeof(c: RefcPassCtx, val: IRIndex): PType = - customAssert c.types[val] != nil, val +func typeof(c: RefcPassCtx, val: IRIndex): TypeId = + customAssert c.types[val] != NoneType, val c.types[val] +func typeKindOf(c: RefcPassCtx, val: IRIndex): TypeNodeKind = + customAssert c.types[val] != NoneType, val + c.env.types[c.types[val]].kind + type StorageLoc = enum slUnknown slStack @@ -595,57 +617,57 @@ func storageLoc(c: RefcPassCtx, val: IRIndex): StorageLoc = # TODO: missing slUnknown -proc requestRtti(c: var RefcPassCtx, cr: var IrCursor, t: PType): IRIndex = +proc requestRtti(c: var RefcPassCtx, cr: var IrCursor, t: TypeId): IRIndex = # refc uses the v1 type-info - cr.insertCallExpr(createMagic(c.graph, c.idgen, "getTypeInfo", mGetTypeInfo), cr.insertLit(newNodeIT(nkType, unknownLineInfo, t))) # TODO: bad; don't create a new mGetTypeInfo sym every time + cr.insertCallExpr(c.extra.magics[mGetTypeInfo], cr.insertLit((nil, t))) # TODO: collect for which types rtti was requested proc processMagicCall(c: var RefcPassCtx, cr: var IrCursor, ir: IrStore3, m: TMagic, n: IrNode3) = ## Lowers calls to various magics into calls to `compilerproc`s - case getMagic(ir, n) + case getMagic(ir, c.env[], n) of mDestroy: # An untransformed `mDestroy` indicates a ref or string. `seq` # destructors were lifted into specialized procs already let val = n.args(0) - case c.typeof(val).kind - of tyString: + case c.env.types[c.typeof(val)].kind + of tnkString: cr.replace() - cr.insertCompProcCall(c.graph, "genericSeqAssign") - of tyRef: + cr.insertCompProcCall(c.extra, "genericSeqAssign") + of tnkRef: # XXX: only non-injected destroys for refs should be turned cr.replace() - let nilLit = cr.insertLit(newNode(nkNilLit)) + let nilLit = cr.insertLit((newNode(nkNilLit), NoneType)) let r = c.storageLoc(val) case r of slStack: # if it's on the stack, we can simply assign 'nil' cr.insertAsgn(askShallow, val, nilLit) of slHeap: - cr.insertCompProcCall(c.graph, "asgnRef", val, nilLit) + cr.insertCompProcCall(c.extra, "asgnRef", val, nilLit) of slUnknown: - cr.insertCompProcCall(c.graph, "unsureAsgnRef", val, nilLit) + cr.insertCompProcCall(c.extra, "unsureAsgnRef", val, nilLit) else: discard of mNew: cr.replace() # TODO: alignment value missing - let v = cr.insertCompProcCall(c.graph, "newObjRC1", c.requestRtti(cr, c.typeof(n.args(0))), cr.insertLit(0)) + let v = cr.insertCompProcCall(c.extra, "newObjRC1", c.requestRtti(cr, c.typeof(n.args(0))), cr.insertLit(0)) # XXX: not sure about `askMove` here... cr.insertAsgn(askMove, n.args(0), v) else: discard "ignore" -proc genRefcRefAssign(cr: var IrCursor, g: ModuleGraph, dst, src: IRIndex, sl: StorageLoc) = +proc genRefcRefAssign(cr: var IrCursor, e: PassEnv, dst, src: IRIndex, sl: StorageLoc) = # TODO: document case sl of slStack: cr.insertAsgn(askShallow, dst, src) of slHeap: - cr.insertCompProcCall(g, "asgnRef", dst, src) + cr.insertCompProcCall(e, "asgnRef", dst, src) of slUnknown: - cr.insertCompProcCall(g, "unsureAsgnRef", dst, src) + cr.insertCompProcCall(e, "unsureAsgnRef", dst, src) proc applyRefcPass(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = @@ -653,17 +675,17 @@ proc applyRefcPass(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCurso of ntkAsgn: case n.asgnKind of askMove: - if c.typeof(n.wrLoc).kind in {tyString, tyRef, tySequence}: - genRefcRefAssign(cr, c.graph, n.wrLoc, n.srcLoc, c.storageLoc(n.wrLoc)) + if c.typeKindOf(n.wrLoc) in {tnkString, tnkRef, tnkSeq}: + genRefcRefAssign(cr, c.extra, n.wrLoc, n.srcLoc, c.storageLoc(n.wrLoc)) # XXX: source needs to be zeroed? of askCopy: - case c.typeof(n.wrLoc).kind - of tyString: + case c.typeKindOf(n.wrLoc) + of tnkString: cr.replace() - cr.insertCompProcCall(c.graph, "copyString", n.wrLoc, n.srcLoc) - of tySequence: + cr.insertCompProcCall(c.extra, "copyString", n.wrLoc, n.srcLoc) + of tnkSeq: cr.replace() - cr.insertCompProcCall(c.graph, "genericSeqAssign", n.wrLoc, n.srcLoc) + cr.insertCompProcCall(c.extra, "genericSeqAssign", n.wrLoc, n.srcLoc) else: discard of askInit, askShallow, askDiscr: @@ -671,24 +693,28 @@ proc applyRefcPass(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCurso discard of ntkCall: - processMagicCall(c, cr, ir, getMagic(ir, n), n) + processMagicCall(c, cr, ir, getMagic(ir, c.env[], n), n) else: discard type HookCtx* = object - graph: ModuleGraph - types: seq[PType] + graph: PassEnv + env: ptr IrEnv + types: seq[TypeId] -func initHookCtx*(g: ModuleGraph, ir: IrStore3): HookCtx = - HookCtx(graph: g, types: computeTypes(ir)) +func initHookCtx*(g: PassEnv, ir: IrStore3, env: IrEnv): HookCtx = + HookCtx(graph: g, types: computeTypes(ir, env)) -func hasAttachedOp*(c: HookCtx, op: TTypeAttachedOp, typ: PType): bool = - assert typ != nil - c.graph.getAttachedOp(typ, op) != nil +func hasAttachedOp*(c: HookCtx, op: TTypeAttachedOp, typ: TypeId): bool = + assert typ != NoneType + typ in c.graph.attachedOps[op] +func getAttachedOp(c: HookCtx, op: TTypeAttachedOp, typ: TypeId): SymId = + assert typ != NoneType + c.graph.attachedOps[op][typ] -func typeof(c: HookCtx, n: IRIndex): PType = - customAssert c.types[n] != nil, n +func typeof(c: HookCtx, n: IRIndex): TypeId = + customAssert c.types[n] != NoneType, n c.types[n] func injectHooks(c: HookCtx, n: IrNode3, cr: var IrCursor) = @@ -704,11 +730,11 @@ func injectHooks(c: HookCtx, n: IrNode3, cr: var IrCursor) = of askMove: if hasAttachedOp(c, attachedSink, typ): cr.replace() - cr.insertCallStmt(c.graph.getAttachedOp(typ, attachedSink), n.wrLoc, n.srcLoc) + cr.insertCallStmt(c.getAttachedOp(attachedSink, typ), n.wrLoc, n.srcLoc) of askCopy: if hasAttachedOp(c, attachedAsgn, typ): cr.replace() - cr.insertCallStmt(c.graph.getAttachedOp(typ, attachedAsgn), n.wrLoc, n.srcLoc) + cr.insertCallStmt(c.getAttachedOp(attachedAsgn, typ), n.wrLoc, n.srcLoc) of askShallow, askDiscr: discard "nothing to do" @@ -726,16 +752,15 @@ func injectHooks(c: HookCtx, n: IrNode3, cr: var IrCursor) = discard func insertError(cr: var IrCursor, err: string): IRIndex {.discardable.} = - cr.insertCallExpr(bcError, nil, cr.insertLit err) - + cr.insertCallExpr(bcError, NoneType, cr.insertLit err) type GenericTransCtx = object graph: ModuleGraph - types: seq[PType] + types: seq[TypeId] -func setupTransCtx*(g: ModuleGraph, ir: IrStore3): GenericTransCtx = +func setupTransCtx*(g: ModuleGraph, ir: IrStore3, env: IrEnv): GenericTransCtx = result.graph = g - result.types = computeTypes(ir) + result.types = computeTypes(ir, env) # XXX: the field position is not necessarily 2; the value should be detected # during compilation instead @@ -774,21 +799,21 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) ## implementation case n.kind of ntkCall: - case getMagic(ir, n) + case getMagic(ir, c.env[], n) of mSetLengthStr: cr.replace() # TODO: is shallow correct here? - cr.insertAsgn(askShallow, n.args(0), cr.insertCompProcCall(c.graph, "setLengthStr", n.args(0), n.args(1))) + cr.insertAsgn(askShallow, n.args(0), cr.insertCompProcCall(c.extra, "setLengthStr", n.args(0), n.args(1))) of mSetLengthSeq: cr.replace() # TODO: evaluation order might be violated here - cr.insertAsgn(askShallow, n.args(0), cr.insertCompProcCall(c.graph, "setLengthSeqV2", n.args(0), c.requestRtti(cr, c.typeof(n.args(0))), n.args(1))) + cr.insertAsgn(askShallow, n.args(0), cr.insertCompProcCall(c.extra, "setLengthSeqV2", n.args(0), c.requestRtti(cr, c.typeof(n.args(0))), n.args(1))) of mNewSeq: cr.replace() let val = n.args(0) - let nilLit = cr.insertLit(newNode(nkNilLit)) + let nilLit = cr.insertLit((newNode(nkNilLit), NoneType)) let sl = c.storageLoc(val) case sl @@ -796,19 +821,19 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) # write barrier # TODO: document let target = cr.newJoinPoint() - cr.insertBranch(cr.insertMagicCall(c.graph, "isNil", mIsNil), target) + cr.insertBranch(cr.insertMagicCall(c.extra, mIsNil), target) # TODO: use nimGCunrefNoCylce when applicable - cr.insertCompProcCall(c.graph, "nimGCunrefRC1", val) + cr.insertCompProcCall(c.extra, "nimGCunrefRC1", val) cr.insertAsgn(askShallow, val, nilLit) cr.insertGoto(target) cr.insertJoin(target) - var ns = cr.insertCompProcCall(c.graph, "newSeq", c.requestRtti(cr, c.typeof(val)), n.args(1)) + var ns = cr.insertCompProcCall(c.extra, "newSeq", c.requestRtti(cr, c.typeof(val)), n.args(1)) ns = cr.insertCast(c.typeof(val), ns) cr.insertAsgn(askShallow, val, ns) of slStack: - var ns = cr.insertCompProcCall(c.graph, "newSeq", c.requestRtti(cr, c.typeof(val)), n.args(1)) + var ns = cr.insertCompProcCall(c.extra, "newSeq", c.requestRtti(cr, c.typeof(val)), n.args(1)) ns = cr.insertCast(c.typeof(val), ns) cr.insertAsgn(askShallow, val, ns) @@ -816,7 +841,7 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) cr.replace() let val = cr.position - discard cr.insertCast(c.typeof(val), cr.insertCompProcCall(c.graph, "nimNewSeqOfCap", c.requestRtti(cr, c.typeof(val)), n.args(0))) + discard cr.insertCast(c.typeof(val), cr.insertCompProcCall(c.extra, "nimNewSeqOfCap", c.requestRtti(cr, c.typeof(val)), n.args(0))) of mAppendSeqElem: # ``seq &= x`` is transformed into: @@ -824,32 +849,33 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) # ``seq = `` cr.replace() let seqVal = n.args(0) - let typ = c.typeof(seqVal).skipTypes({tyVar}) + let typ = c.typeof(seqVal)#.skipTypes({tyVar}) # XXX: if the refc pass would be run after the `lowerSeqV1` pass, a # `askMove` assignment could be used here instead - cr.genRefcRefAssign(c.graph, seqVal, cr.insertCast(typ, cr.insertCompProcCall(c.graph, "incrSeqV3", seqVal, c.requestRtti(cr, typ)) ), c.storageLoc(seqVal)) + cr.genRefcRefAssign(c.extra, seqVal, cr.insertCast(typ, cr.insertCompProcCall(c.extra, "incrSeqV3", seqVal, c.requestRtti(cr, typ)) ), c.storageLoc(seqVal)) # TODO: filling the element and adjusting the seq length is missing - discard cr.insertCallExpr(bcError, nil, cr.insertLit "Not implemented: lowerSeqsV1.mAppendSeqElem") + cr.insertError("Not implemented: lowerSeqsV1.mAppendSeqElem") of mAppendStrStr: cr.replace() var lens: array[2, IRIndex] #lens[0] = genIfThanElse() # we `len` call needs to be lowered directly - discard cr.insertCallExpr(bcError, nil, cr.insertLit "Not implemented: lowerSeqsV1.mAppendStrStr") + cr.insertError("Not implemented: lowerSeqsV1.mAppendStrStr") of mLengthStr: cr.replace() # XXX: might be a good idea to cache the `string` type - let strTyp = c.graph.getCompilerProc("NimStringDesc") + let strTyp = c.extra.compilerprocs["NimStringDesc"] #genIfThanElse(cr.insertMagicCall("isNil", mIsNil, a.val)) - discard cr.insertCallExpr(bcError, nil, cr.insertLit "Not implemented: lowerSeqsV1.mLengthStr") + cr.insertError("Not implemented: lowerSeqsV1.mLengthStr") else: discard + #[ of ntkLocal: # replace locals of `seq` and `string` type with locals of the lowered type # XXX: there's currently no way to replace an existing local (would be @@ -857,7 +883,7 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) # introducing a new local and replacing all reference to the old one let (lk, origTyp, sym) = ir.getLocal(cr.position) - let typ = origTyp.skipTypes(abstractInst) + let typ = origTyp # TODO: handle ``var`` and ``lent`` wrapped types here case typ.kind @@ -870,8 +896,6 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) let idx = ir.getLocalIdx(cr.position) var newName = c.localMap.getOrDefault(idx, -1) if newName == -1: - # XXX: ugly; the whole backend would probably benefit from it's own - # symbol representation let nt = c.requestSeqType(typ) if sym != nil: let ns = copySym(sym, c.idgen.nextSymId()) @@ -940,20 +964,21 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) sym.typ.add newType else: sym.typ = newType + ]# of ntkPathArr: - let arrTyp = c.typeof(n.srcLoc).skipTypes(abstractInst) + let arrTyp = c.typeof(n.srcLoc) # TODO: needs tests - case skipTypes(arrTyp, {tyVar, tyLent}).kind - of tyString, tySequence: + case c.env.types[arrTyp].kind#skipTypes(arrTyp, {tyVar, tyLent}).kind + of tnkString, tnkSeq: # --> x[].data[idx] cr.replace() var r = cr.insertDeref(n.srcLoc) # a `lent seq` is not a treated as a `ptr NimSeq` but just as `NimSeq` # (`NimSeq` itself is a pointer type) - if arrTyp.kind == tyVar: + if c.env.types[arrTyp].kind == tnkVar: r = cr.insertDeref(r) r = cr.insertPathObj(r, SeqDataFieldPos) @@ -971,13 +996,19 @@ func lowerSeqsV2(c: GenericTransCtx, n: IrNode3, cr: var IrCursor) = doAssert false, "missing" type LiftPassCtx* = object - graph*: ModuleGraph + graph*: PassEnv idgen*: IdGenerator cache*: IdentCache - typeInfoMarker*: Table[SigHash, PSym] # sig hash -> type info sym + env*: ptr IrEnv + + typeInfoMarker*: Table[TypeId, SymId] # sig hash -> type info sym + + syms*: seq[(SymId, TypeId)] ## all lifted globals - syms*: seq[(PSym, PType)] ## all lifted globals +func addGlobal*(c: var LiftPassCtx, t: TypeId, name: string): SymId = + # XXX: temporary helper + c.env.syms.addSym(skLet, t, name, {sfGlobal}) # XXX: uh-oh, hidden mutation proc liftTypeInfoV1(c: var LiftPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = ## Turns all ``mGetTypeInfo`` calls into globals and collects the newly @@ -985,38 +1016,38 @@ proc liftTypeInfoV1(c: var LiftPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCurs # XXX: can this really be considered lifting? case n.kind of ntkCall: - if getMagic(ir, n) == mGetTypeInfo: + if getMagic(ir, c.env[], n) == mGetTypeInfo: cr.replace() let typ = ir.getLit(ir.at(n.args(0))).typ - sig = hashType(typ) - assert typ != nil + assert typ != NoneType - var s = c.typeInfoMarker.getOrDefault(sig) - if s == nil: + # XXX: the types weren't canonicalized, so we're creating lots of + # duplicate type info globals for the same type + var s = c.typeInfoMarker.getOrDefault(typ) + if s == NoneSymbol: # TODO: either use a `Rope` here or use a string buffer stored in # `LiftPassCtx` that is reserved for temporary usage like this - let name = "NTI" & $sig & "_" # XXX: too many short-lived and unnecessary allocations + let name = "NTI" & $(typ.int) & "_" # XXX: too many short-lived and unnecessary allocations - # the symbol is owned by the module the type is owned by - s = newSym(skVar, c.cache.getIdent(name), c.idgen.nextSymId(), typ.owner.getModule(), unknownLineInfo) # TODO: cache the `TNimType` type - s.typ = c.graph.getCompilerProc("TNimType").typ - s.flags.incl sfGlobal + let globalType = c.env.syms[c.graph.getCompilerProc("TNimType")].typ + # the symbol is owned by the module the type is owned by + s = c.addGlobal(globalType, name) - c.typeInfoMarker[sig] = s + c.typeInfoMarker[typ] = s # TODO: cache the `pointer` type - discard cr.insertCast(c.graph.getSysType(unknownLineInfo, tyPointer), cr.insertSym s) + discard cr.insertCast(c.graph.getSysType(tyPointer), cr.insertSym s) else: discard const ErrFlagName = "nimError" -proc lowerTestError*(ir: var IrStore3, g: ModuleGraph, cache: IdentCache, idgen: IdGenerator, owner: PSym) = +proc lowerTestError*(ir: var IrStore3, g: PassEnv, types: TypeEnv, syms: var SymbolEnv) = ## Lowers ``bcTestError`` builtin calls for the C-like targets. Turns ## ``bcTestError`` into ``unlikelyProc(ErrFlagName[])`` and inserts a ## @@ -1050,24 +1081,19 @@ proc lowerTestError*(ir: var IrStore3, g: ModuleGraph, cache: IdentCache, idgen: cr.setPos 0 let p = g.getCompilerProc("nimErrorFlag") - s = newSym(skLet, cache.getIdent(ErrFlagName), idgen.nextSymId(), owner, unknownLineInfo) - s.typ = p.getReturnType() + # TODO: this lookup yields the same across all calls to `lowerTestError`. Cache both the compiler proc and it's return type + typ = types.getReturnType(syms[p].typ) + s = syms.addSym(skLet, typ, ErrFlagName) # XXX: no caching is currently done for the symbol names, so a lot of duplicated strings are created here... - errFlag = cr.insertLocalRef(cr.newLocal(lkLet, s)) + + errFlag = cr.insertLocalRef(cr.newLocal(lkLet, typ, s)) cr.insertAsgn(askInit, errFlag, cr.insertCallExpr(p)) cr.setPos i # set cursor back to the current position cr.replace() - var up: PSym - # TODO: `systemModuleSyms` only picks up on the 'unlikelyProc' if it's exported... - for x in g.systemModuleSyms(cache.getIdent("unlikelyProc")): - up = x - break - assert up != nil - - discard cr.insertCallExpr(up, cr.insertDeref(errFlag)) + discard cr.insertCallExpr(bcUnlikely, NoneType, cr.insertDeref(errFlag)) # TODO: `NoneType` is wrong here else: discard @@ -1082,20 +1108,20 @@ proc lowerSets*(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) # `cr.inline` here case n.kind of ntkCall: - case getMagic(ir, n) + case getMagic(ir, c.env[], n) of mInSet: cr.replace() - let setType = skipTypes(c.typeof(cr.position), abstractVar) - let size = getSize(c.graph.config, setType).int + let setType = c.typeof(n.args(0)) + let size = c.env.types.getSize(setType).int case size of 1, 2, 4, 8: # small sets cr.insertError("mInSet for small sets missing") else: - let uintTyp = c.graph.getSysType(unknownLineInfo, tyUInt) + let uintTyp = c.extra.getSysType(tyUInt) let conv = cr.insertConv(uintTyp, n.args(1)) - cr.insertMagicCall(c.graph, "and", mBitandI, cr.insertMagicCall(c.graph, "shr", mShrI, conv, cr.insertLit 3), cr.insertMagicCall(c.graph, "shl", mShlI, cr.insertLit 1, cr.insertMagicCall(c.graph, "and", mBitandI, conv, cr.insertLit 7))) + cr.insertMagicCall(c.extra, mBitandI, cr.insertMagicCall(c.extra, mShrI, conv, cr.insertLit 3), cr.insertMagicCall(c.extra, mShlI, cr.insertLit 1, cr.insertMagicCall(c.extra, mBitandI, conv, cr.insertLit 7))) # TODO: unfinished #binaryExprIn(p, e, a, b, d, "(($1[(NU)($2)>>3] &(1U<<((NU)($2)&7U)))!=0)") @@ -1120,29 +1146,29 @@ proc lowerRangeChecks*(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrC var cond: IRIndex var raiser: string - case srcTyp.kind - of tyUInt, tyUInt64: + case c.env.types[srcTyp].kind + of tnkInt: # tyUInt, tyUInt64: # .. code:: nim # cast[dstTyp](high) < val - cond = cr.insertMagicCall(c.graph, "<", mLtU, cr.insertCast(srcTyp, n.args(0)), n.args(2)) + cond = cr.insertMagicCall(c.extra, mLtU, cr.insertCast(srcTyp, n.args(0)), n.args(2)) raiser = "raiseRangeErrorNoArgs" else: - let dstTyp = skipTypes(c.typeof(cr.position), abstractVarRange) - case dstTyp.kind - of tyUInt8..tyUInt32, tyChar: + let dstTyp = c.typeof(cr.position)#skipTypes(c.typeof(cr.position), abstractVarRange) + case c.env.types[dstTyp].kind + of tnkInt: #tyUInt8..tyUInt32, tyChar: raiser = "raiseRangeErrorU" - of tyFloat..tyFloat128: + of tnkFloat: #tyFloat..tyFloat128: raiser = "raiseRangeErrorF" let conv = cr.insertConv(dstTyp, n.args(0)) # no need to lower the `or` into an `ntkBranch` + `ntkJoin` here; it has no impact on further analysis - cond = cr.insertMagicCall(c.graph, "or", mOr, cr.insertMagicCall(c.graph, "<", mLtF64, conv, n.args(1)), cr.insertMagicCall(c.graph, "<", mLtF64, n.args(2), conv)) + cond = cr.insertMagicCall(c.extra, mOr, cr.insertMagicCall(c.extra, mLtF64, conv, n.args(1)), cr.insertMagicCall(c.extra, mLtF64, n.args(2), conv)) else: cr.insertError("missing chkRange impl") raiser = - case skipTypes(c.typeof(cr.position), abstractVarRange).kind - of tyFloat..tyFloat128: "raiseRangeErrorF" + case c.env.types[c.typeof(cr.position)].kind#skipTypes(c.typeof(cr.position), abstractVarRange).kind + of tnkFloat: "raiseRangeErrorF"#tyFloat..tyFloat128: "raiseRangeErrorF" else: "raiseRangeErrorI" #[ @@ -1155,8 +1181,8 @@ proc lowerRangeChecks*(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrC ]# let target = cr.newJoinPoint() - cr.insertBranch(cr.insertMagicCall(c.graph, "not", mNot, cond), target) - cr.insertCompProcCall(c.graph, raiser, n.args(0), n.args(1), n.args(2)) + cr.insertBranch(cr.insertMagicCall(c.extra, mNot, cond), target) + cr.insertCompProcCall(c.extra, raiser, n.args(0), n.args(1), n.args(2)) # XXX: it would be nice if we could also move the following # ``if bcTestError(): goto error`` into the branch here diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index fa54cc5b4c4..77c38daa20d 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -1,16 +1,34 @@ ## The definitions for the type representation used by the compiler back-end (mid-end?) IR. +import + std/[ + tables + ], + compiler/front/[ + options, msgs + ], + compiler/ast/[ + ast_types, + ast, + types + ] + +from compiler/vm/vmdef import unreachable + type RecordNodeKind* = enum rnkEmpty # meant to be used by the garbage collector to fill cleaned slots rnkList + rnkFields rnkCase rnkBranch type RecordNode* = object - kind: RecordNodeKind - len: uint32 ## the number of items - a: uint32 ## - b: uint32 + kind*: RecordNodeKind + len*: uint32 ## the number of items + a*: uint32 ## + b*: uint32 + +type RecordNodeIndex* = distinct uint32 type RecordId* = distinct uint32 @@ -18,10 +36,16 @@ type TypeId* = distinct uint32 type SymId* = distinct uint32 -type TypeNodeKind = enum +type TypeNodeKind* = enum tnkEmpty + tnkVoid + + tnkBool + tnkChar + tnkInt + tnkUInt tnkFloat tnkRef @@ -32,30 +56,49 @@ type TypeNodeKind = enum tnkSeq tnkOpenArray tnkString + tnkCString + tnkUncheckedArray #tnkSink # XXX: ? + tnkSet + tnkRecord # tuples and objects tnkArray tnkProc - tnkDistinct # XXX: not sure + tnkTypeDesc ## only relevant for compile-time function evaluation - tnkImported # an imported type. Has one child node, used to derive the access semantics from tnkName # a reference to named type #tnkAlias type FieldDesc* = object - sym: SymId # may be empty - typ: TypeId + sym*: SymId # may be empty + typ*: TypeId # XXX: bitsize should likely be stored as part of FieldDesc +type FieldId* = distinct uint32 + type TypeNode* = object kind: TypeNodeKind a: uint32 b: uint32 +# XXX: there actually exist two kinds of types of which the backend cares +# about the distinction: +# * "declared type": the entity that is defined in a 'type' section in the source code. Not relevant at the IR-level, only for the code-generators. Represents information such as the type's name, if it's imported, which C-header it depends on, etc. +# * type (haven't found a good name for this one yet): the raw type information used by the IR + + +type Type* = object + kind*: TypeNodeKind + base: TypeId + a*: uint32 + b*: uint32 + c*: uint32 # for records, a ``RecordNodeIndex`` + sig: seq[TypeId] # for procedures + type TypeEnv* = object ## Holds the data for all types # XXX: in general, a `seq[seq]` could be used for `records`, `fields`, and @@ -64,13 +107,73 @@ type TypeEnv* = object # memory fragmentation and reduce cache locality records: seq[RecordNode] ## the bodies for all record-like types (objects and tuples) in one contiguous seq fields: seq[FieldDesc] ## all fields - types: seq[TypeNode] ## all types in one contiguous seq + types: seq[Type] ## all types in one contiguous seq # XXX: maybe a redirection table for `tnkName` makes sense? Alternatively, # indirections to another tnkName could be allowed + typdescs: Table[ItemId, PType] # type-id -> a `tyTypeDesc` type type TypeLookup* = object ## Data needed for mapping ``PType`` to the ``TypeId`` +type + DeferredTypeGen* = object + env*: ptr TypeEnv # XXX: a `lent` should be used instead of a pointer. It + # would also make sure that the borrowed from + # `TypeEnv` is sealed for the lifetime of the `DeferredTypeGen` + map: Table[ItemId, TypeId] # type-id -> ``TypeId`` + list: seq[(PType, int)] ## the list of deferred types in the order they were requested + + voidType*: PType ## a ``PType`` of kind ``tyVoid``. Requesting a nil type + ## is remapped to a request using this type + charType*: PType + + trace: int + traces: seq[seq[StackTraceEntry]] + isInGen: bool + + nextTypeId: uint32 + + Declaration* = object + name*: string # the name to used for the declaration in the output of the + # code-generators. If `forceName` is false, the name may be + # escaped if deemed necessary. `name` is allowed be empty. + forceName*: bool + header*: string # only needed for the C-backend. # TODO: this need to be handled differently. Will likely use the attachment strategy + + Symbol* = object + ## The symbol representation used by the backend + + # XXX: using one type (i.e. `Symbol` to describe procedures, locals, + # globals and constants) might be the wrong approach for the backend. + # They all require different kinds of information and while using an + # opaque handle and attaching data to it separately works, it's + # probably a better idea to put them in fully separate namespaces. + # This would also allow eaiser dependency scanning without requiring + # and indirection (at the cost of more enum values in `IrNodeKind3`) + + kind*: TSymKind + typ*: TypeId + + flags*: TSymFlags + + magic*: TMagic + position*: int # inherited from `TSym`, might be removed/replaced + + decl*: Declaration + + SymbolEnv* = object + # TODO: maybe rename? + + symbols*: seq[Symbol] + + # TODO: maybe split off `map` into a separate `SymbolLookup` type? + map: Table[ItemId, SymId] # ``PSym``-id -> ``SymId`` + + # XXX: `orig` will likely be removed/replaced later on + orig*: Table[SymId, PSym] # stores the associated ``PSym`` for a symbol. Currently meant to be used by the code-generators. + +const NoneType* = TypeId(0) +const NoneSymbol* = SymId(0) # XXX: copied from `ccgtypes`, might need some adjustments const @@ -78,6 +181,147 @@ const tyDistinct, tyRange, tyStatic, tyAlias, tySink, tyInferred, tyOwned} +func `==`*(a, b: TypeId): bool {.borrow.} +func `==`*(a, b: SymId): bool {.borrow.} + +func `inc`*(a: var RecordNodeIndex, val: int = 1) {.borrow.} + +type SomeId = TypeId | SymId | RecordId | FieldId + +template toIndex*(id: SomeId): uint32 = + id.uint32 - 1 + +template toId[T: SomeId](index: Natural, id: typedesc[T]): T = + T(index + 1) + +func `[]`*(e: SymbolEnv, s: SymId): lent Symbol = + e.symbols[s.int - 1] + +func `[]`*(e: TypeEnv, t: TypeId): lent Type = + e.types[toIndex(t)] + +func `[]`*(e: TypeEnv, f: FieldId): lent FieldDesc = + e.fields[f.int - 1] + +func field*(e: TypeEnv, f: Natural): lent FieldDesc = + e.fields[f] + +func `[]`*(e: TypeEnv, i: RecordNodeIndex): lent RecordNode = + e.records[i.int] + +func `[]`*(e: TypeEnv, i: RecordId): lent RecordNode = + e.records[toIndex(i)] + +func getReturnType*(e: TypeEnv, t: TypeId): TypeId = + ## Returns the return type of the given procedure type `t` + assert e[t].kind == tnkProc, $e[t].kind + e[t].sig[0] + +func elemType*(e: TypeEnv, t: TypeId): TypeId = + e[t].base + +func baseType*(e: TypeEnv, t: TypeId): TypeId = + e[t].base + +func numFields*(n: RecordNode): int = + assert n.kind == rnkList + n.a.int + +func base*(t: Type): TypeId = + t.base + + +func record*(t: Type): RecordId = + assert t.kind == tnkRecord + t.c.RecordId + +type Fields* = distinct Slice[uint32] + +iterator pairs*(f: Fields): (int, FieldId) = + let + o = Slice[uint32](f) + L = o.len + var i = 0 + while i < L: + yield (i, toId(o.a + i.uint32, FieldId)) + inc i + +func fields*(env: TypeEnv, id: TypeId): Fields = + let t = env[id] + assert t.kind == tnkRecord + Fields((t.a + 1) .. (t.a + env[t.record].a - 1)) + +func `[]`*(f: Fields, x: Natural): FieldId {.inline.} = + let o = Slice[uint32](f) + assert x <= o.b.int + toId(o.a.int + x, FieldId) + +func numFields*(env: TypeEnv, t: TypeId): int = + assert env[t].kind == tnkRecord + env[env[t].record].a.int + + +func combine(lo, hi: uint32): uint64 {.inline.} + +# TODO: use ``BiggestUInt`` here +func length*(e: TypeEnv, t: TypeId): uint = + assert e[t].kind in {tnkArray, tnkSet} + combine(e[t].a, e[t].b).uint + +func callConv*(e: TypeEnv, t: TypeId): TCallingConvention = + assert e[t].kind == tnkProc + e[t].a.TCallingConvention + +func callConv*(t: Type): TCallingConvention = + assert t.kind == tnkProc + t.a.TCallingConvention + +func param*(e: TypeEnv, t: TypeId, i: BackwardsIndex): TypeId = + assert e[t].kind == tnkProc + e[t].sig[i] + +func param*(e: TypeEnv, t: TypeId, i: Natural): TypeId = + assert e[t].kind == tnkProc + e[t].sig[i] + +func size*(t: Type): uint = + assert t.kind in {tnkFloat, tnkInt, tnkUInt} + t.a.uint + +func length*(t: Type): uint = + assert t.kind in {tnkSet, tnkArray} + t.a.uint + +iterator params*(e: TypeEnv, t: TypeId): TypeId = + let typ = e[t] + assert typ.kind == tnkProc + for i in 1.. ir position - syms: seq[PSym] - literals: seq[PNode] - locals: seq[(LocalKind, PType, PSym)] + #syms: seq[PSym] + literals: seq[Literal] + locals: seq[(LocalKind, TypeId, SymId)] localSrc: seq[seq[StackTraceEntry]] sources: seq[seq[StackTraceEntry]] # the stack trace of where each node was added + + IrEnv* = object + ## + syms*: SymbolEnv + types*: TypeEnv + CodeFragment* = object code*: seq[TInstr] debug*: seq[TLineInfo] @@ -350,17 +364,21 @@ func add(x: var IrStore3, n: sink IrNode3): IRIndex = ## version 2/3 -func genLocal*(c: var IrStore3, kind: LocalKind, typ: PType): int = - assert typ != nil - c.locals.add((kind, typ, nil)) +func genLocal*(c: var IrStore3, kind: LocalKind, typ: TypeId): int = + assert typ != NoneType + c.locals.add((kind, typ, NoneSymbol)) result = c.locals.high {.noSideEffect.}: c.localSrc.add(getStackTraceEntries()) -func genLocal*(c: var IrStore3, kind: LocalKind, sym: PSym): int = +func genLocal*(c: var IrStore3, kind: LocalKind, typ: TypeId, sym: SymId): int = ## A local that has a symbol - assert sym.typ != nil - c.locals.add((kind, sym.typ, sym)) + # XXX: introduce an ``OptionalTypeId`` and ``OptionalSymId`` and make ``TypeId`` and ``SymId`` mean never none? + assert sym != NoneSymbol + assert typ != NoneType + # XXX: maybe not require `typ` here and only store either a type or symbol + # for the local? + c.locals.add((kind, typ, sym)) result = c.locals.high {.noSideEffect.}: c.localSrc.add(getStackTraceEntries()) @@ -371,11 +389,11 @@ func irContinue*(c: var IrStore3) = func irUse*(c: var IrStore3, loc: IRIndex): IRIndex = c.add(IrNode3(kind: ntkUse, theLoc: loc)) -proc irSym*(c: var IrStore3, sym: PSym): IRIndex = +proc irSym*(c: var IrStore3, sym: SymId): IRIndex = # TODO: don't add duplicate items? - assert sym != nil - c.syms.add(sym) - c.add(IrNode3(kind: ntkSym, symIdx: c.syms.high)) + assert sym != NoneSymbol + #c.syms.add(sym) + c.add(IrNode3(kind: ntkSym, sym: sym)) func irDeref*(c: var IrStore3, val: IRIndex): IRIndex = c.add(IrNode3(kind: ntkDeref, addrLoc: val)) @@ -465,7 +483,7 @@ proc irStmt*(c: var IrStore, opc: TOpcode, args: varargs[IRIndex]): IRIndex {.di func irCall*(c: var IrStore3, callee: IRIndex, args: varargs[IRIndex]): IRIndex = c.add(IrNode3(kind: ntkCall, isBuiltin: false, callee: callee, args: @args)) -func irCall*(c: var IrStore3, callee: BuiltinCall, typ: PType, args: varargs[IRIndex]): IRIndex = +func irCall*(c: var IrStore3, callee: BuiltinCall, typ: TypeId, args: varargs[IRIndex]): IRIndex = c.add(IrNode3(kind: ntkCall, isBuiltin: true, builtin: callee, typ: typ, args: @args)) proc irCallExpr*(c: var IrStore, name: IRIndex, noSideEffect: bool, args: varargs[IRIndex]): IRIndex = @@ -536,10 +554,10 @@ func irAddr*(c: var IrStore3, loc: IRIndex): IRIndex = # version 1 (old) transition helpers -func irLit*(c: var IrStore3, n: PNode): IRIndex = +func irLit*(c: var IrStore3, lit: Literal): IRIndex = #assert n.typ != nil result = c.add(IrNode3(kind: ntkLit, litIdx: c.literals.len)) - c.literals.add(n) + c.literals.add(lit) # TODO: not related to the IR. Might need a better home proc append*(dst: var CodeFragment, f: CodeFragment) = @@ -563,23 +581,23 @@ iterator nodes*(s: IrStore3): lent IrNode3 = for it in s.nodes: yield it -iterator locals*(s: IrStore3): (PType, PSym) = +iterator locals*(s: IrStore3): (TypeId, SymId) = for it in s.locals: yield (it[1], it[2]) func at*(irs: IrStore3, i: IRIndex): lent IrNode3 = irs.nodes[i] -func sym*(c: IrStore3, n: IrNode3): PSym = - c.syms[n.symIdx] +func sym*(c: IrStore3, n: IrNode3): SymId = + n.sym #c.syms[n.symIdx] -func getLocal*(irs: IrStore3, n: IRIndex): (LocalKind, PType, PSym) = +func getLocal*(irs: IrStore3, n: IRIndex): (LocalKind, TypeId, SymId) = irs.locals[irs.nodes[n].local] func getLocalIdx*(irs: IrStore3, n: IRIndex): int = irs.nodes[n].local -func getLit*(irs: IrStore3, n: IrNode3): PNode = +func getLit*(irs: IrStore3, n: IrNode3): lent Literal = irs.literals[n.litIdx] func isLoop*(ir: IrStore3, j: JoinPoint): bool = @@ -617,7 +635,7 @@ iterator args*(n: IrNode3): IRIndex = yield it -func typ*(n: IrNode3): PType = +func typ*(n: IrNode3): TypeId = ## The return type of a builtin call n.typ @@ -1521,7 +1539,7 @@ type type LocIndex* = int - FieldId* = int + #FieldId* = int NewNodeId* = int ValueId* = int @@ -1810,9 +1828,9 @@ type type IrCursor* = object pos: int actions: seq[(bool, Slice[IRIndex])] # true = replace, false = insert - newSyms: SeqAdditions[PSym] - newLocals: SeqAdditions[(LocalKind, PType, PSym)] - newLiterals: SeqAdditions[PNode] + #newSyms: SeqAdditions[PSym] + newLocals: SeqAdditions[(LocalKind, TypeId, SymId)] + newLiterals: SeqAdditions[Literal] # literal + type newNodes: seq[IrNode3] traces: seq[seq[StackTraceEntry]] @@ -1839,7 +1857,7 @@ func apply[T](dest: var seq[T], src: sink SeqAdditions[T]) = func setup*(cr: var IrCursor, ir: IrStore3) = cr.nextIdx = ir.len - cr.newSyms.setFrom(ir.syms) + #cr.newSyms.setFrom(ir.syms) cr.newLocals.setFrom(ir.locals) cr.newLiterals.setFrom(ir.literals) @@ -1871,31 +1889,30 @@ func insert(cr: var IrCursor, n: sink IrNode3): IRIndex = cr.actions.add (false, cr.pos..cr.pos) result = cr.getNext() -func insertSym*(cr: var IrCursor, sym: PSym): IRIndex = - assert sym != nil - cr.insert IrNode3(kind: ntkSym, symIdx: cr.newSyms.add(sym)) +func insertSym*(cr: var IrCursor, sym: SymId): IRIndex = + assert sym != NoneSymbol + cr.insert IrNode3(kind: ntkSym, sym: sym) -func insertCallExpr*(cr: var IrCursor, sym: PSym, args: varargs[IRIndex]): IRIndex = +func insertCallExpr*(cr: var IrCursor, sym: SymId, args: varargs[IRIndex]): IRIndex = let c = cr.insertSym(sym) result = cr.insert IrNode3(kind: ntkCall, isBuiltin: false, callee: c, args: @args) -func insertCallStmt*(cr: var IrCursor, sym: PSym, args: varargs[IRIndex]) = +func insertCallStmt*(cr: var IrCursor, sym: SymId, args: varargs[IRIndex]) = discard insertCallExpr(cr, sym, args) -func insertCallExpr*(cr: var IrCursor, bc: BuiltinCall, typ: PType, args: varargs[IRIndex]): IRIndex = +func insertCallExpr*(cr: var IrCursor, bc: BuiltinCall, typ: TypeId, args: varargs[IRIndex]): IRIndex = result = cr.insert IrNode3(kind: ntkCall, isBuiltin: true, builtin: bc, typ: typ, args: @args) -func insertLit*(cr: var IrCursor, lit: PNode): IRIndex = - assert lit != nil +func insertLit*(cr: var IrCursor, lit: Literal): IRIndex = cr.insert IrNode3(kind: ntkLit, litIdx: cr.newLiterals.add(lit)) func insertAsgn*(cr: var IrCursor, kind: AssignKind, a, b: IRIndex) = discard cr.insert IrNode3(kind: ntkAsgn, asgnKind: kind, wrDst: a, wrSrc: b) -func insertCast*(cr: var IrCursor, t: PType, val: IRIndex): IRIndex = +func insertCast*(cr: var IrCursor, t: TypeId, val: IRIndex): IRIndex = cr.insertCallExpr(bcCast, t, val) -func insertConv*(cr: var IrCursor, t: PType, val: IRIndex): IRIndex = +func insertConv*(cr: var IrCursor, t: TypeId, val: IRIndex): IRIndex = cr.insertCallExpr(bcConv, t, val) func insertDeref*(cr: var IrCursor, val: IRIndex): IRIndex = @@ -1919,12 +1936,12 @@ func insertGoto*(cr: var IrCursor, t: JoinPoint) = func insertJoin*(cr: var IrCursor, t: JoinPoint) = discard -func newLocal*(cr: var IrCursor, kind: LocalKind, s: PSym): int = - cr.newLocals.add((kind, s.typ, s)) +func newLocal*(cr: var IrCursor, kind: LocalKind, t: TypeId, s: SymId): int = + cr.newLocals.add((kind, t, s)) -func newLocal*(cr: var IrCursor, kind: LocalKind, t: PType): int = +func newLocal*(cr: var IrCursor, kind: LocalKind, t: TypeId): int = assert kind == lkTemp - cr.newLocals.add((kind, t, nil)) + cr.newLocals.add((kind, t, NoneSymbol)) func insertLocalRef*(cr: var IrCursor, name: int): IRIndex = cr.insert IrNode3(kind: ntkLocal, local: name) @@ -1964,7 +1981,7 @@ func patch(n: var IrNode3, patchTable: seq[IRIndex]) = ntkContinue, ntkGotoLink, ntkLoad, ntkWrite, ntkRoot, ntkLit: discard "nothing to patch" -func inline*(cr: var IrCursor, other: IrStore3, args: varargs[IRIndex]): IRIndex = +func inline*(cr: var IrCursor, other: IrStore3, sEnv: SymbolEnv, args: varargs[IRIndex]): IRIndex = ## Does NOT create temporaries for each arg # XXX: unfinished @@ -1978,7 +1995,7 @@ func inline*(cr: var IrCursor, other: IrStore3, args: varargs[IRIndex]): IRIndex let oldLen = cr.newNodes.len cr.newNodes.add(other.nodes) - cr.newSyms.add(other.syms) + #cr.newSyms.add(other.syms) cr.newLocals.add(other.locals) cr.traces.add(other.sources) # use the traces of the original @@ -1990,13 +2007,15 @@ func inline*(cr: var IrCursor, other: IrStore3, args: varargs[IRIndex]): IRIndex patchTable[i - oldLen] = i case cr.newNodes[i].kind of ntkSym: - let s = other.syms[cr.newNodes[i].symIdx] + let s = sEnv[cr.newNodes[i].sym] + # XXX: another indicator that a dedicated ``ntkParam`` would be + # better: we need access to ``SymbolEnv`` here if s.kind == skParam: assert s.position < args.len, "not enough arguments" # for simplicity, the original parameter reference node is left as is patchTable[i - oldLen] = args[s.position] - else: - cr.newNodes[i].symIdx += cr.newSyms.start + #else: + # cr.newNodes[i].symIdx += cr.newSyms.start of ntkLocal: cr.newNodes[i].local += cr.newLocals.start @@ -2011,7 +2030,7 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = let oldLen = ir.len patchTable.newSeq(cr.nextIdx) # old ir len + insert node count - ir.syms.apply(cr.newSyms) + #ir.syms.apply(cr.newSyms) ir.locals.apply(cr.newLocals) ir.literals.apply(cr.newLiterals) diff --git a/lib/system.nim b/lib/system.nim index cd81e792307..c0cf52ea067 100644 --- a/lib/system.nim +++ b/lib/system.nim @@ -2069,7 +2069,7 @@ func abs*(x: int64): int64 {.magic: "AbsI", inline.} = when not defined(js): proc likelyProc(val: bool): bool {.importc: "NIM_LIKELY", nodecl, noSideEffect.} - proc unlikelyProc*(val: bool): bool {.importc: "NIM_UNLIKELY", nodecl, noSideEffect.} + proc unlikelyProc(val: bool): bool {.importc: "NIM_UNLIKELY", nodecl, noSideEffect.} template likely*(val: bool): bool = ## Hints the optimizer that `val` is likely going to be true. From 3a8bf3479d6ffcae8144d473cd2d575e0b667daa Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:33:18 +0100 Subject: [PATCH 006/395] fix: handling of system magics The `localReport` in `getSysMagic` raises, making lookup of potentially non-system-declared magics impossible. A custom re-implementation is used instead --- compiler/vm/cbackend2.nim | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index b8b9a78e220..60bc2b7fb35 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -6,7 +6,8 @@ import compiler/ast/[ ast, ast_types, - astalgo, # for `getModule` + astalgo, # for `getModule`, + idents, lineinfos, reports ], @@ -233,6 +234,19 @@ proc getCFile(config: ConfigRef, filename: AbsoluteFile): AbsoluteFile = result = changeFileExt( completeCfilePath(config, withPackageName(config, filename)), ext) + +proc getSysMagic2(g: ModuleGraph, name: string, m: TMagic): PSym = + ## Same as ``magicsys.getSysMagic``, except that: + ## * it doesn't use ``localReport``. + ## * procedures returning int don't have higher precedence + ## * `nil` is returned if no matching magic is found + ## * no line info is required + let id = getIdent(g.cache, name) + for r in systemModuleSyms(g, id): + if r.magic == m: + result = r + + proc generateCode*(g: ModuleGraph) = ## The backend's entry point. Orchestrates code generation and linking. If ## all went well, the resulting binary is written to the project's output @@ -301,19 +315,23 @@ proc generateCode*(g: ModuleGraph) = # XXX: a magic is not necessarily a procedure - it can also be a type # create a symbol for each magic to be used by the IR transformations - for m, id in passEnv.magics.mpairs: + for m in low(TMagic)..high(TMagic): # fetch the name from a "real" symbol - let sym = g.getSysMagic(unknownLineInfo, "", m) + let sym = g.getSysMagic2("", m) let name = - if sym.isError(): + if sym.isNil(): # not every magic has symbol defined in ``system.nim`` (e.g. procs and # types only used in the backend) $m else: sym.name.s - id = c.symEnv.addMagic(skProc, NoneType, name, m) + if sym != nil and sym.kind notin routineKinds: + # we don't care about magic types here + continue + + passEnv.magics[m] = c.symEnv.addMagic(skProc, NoneType, name, m) for op, tbl in passEnv.attachedOps.mpairs: for k, v in g.attachedOps[op].pairs: From 937e14df80c8b443d640fa1eb01e3250bc352fce Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:34:13 +0100 Subject: [PATCH 007/395] fix: initialize types for symbols --- compiler/vm/cbackend2.nim | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 60bc2b7fb35..35e9896e32c 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -346,6 +346,17 @@ proc generateCode*(g: ModuleGraph) = passEnv.sysTypes[t] = c.types.requestType(g.getSysType(unknownLineInfo, t)) + for id, s in c.symEnv.msymbols: + if (let orig = c.symEnv.orig.getOrDefault(id); orig != nil): + # a `nil` PType would get turned into a ``void`` and we explicitly + # don't want this behaviour here (that is, if the type is `nil`, it + # needs to map to ``NoneType``). ``irpasses.computeTypes`` depends on + # the type being empty if there is no type information + if orig.typ != nil: + s.typ = c.types.requestType(orig.typ) + + c.types.flush(c.symEnv, g.config) + let entryPoint = generateMain(c, passEnv, mlist[]) From 5784d81ffeef60bfcccde1b20de3aa8a7a73d9b4 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:35:05 +0100 Subject: [PATCH 008/395] debugging related fixes and improvements --- compiler/vm/cbackend2.nim | 15 ++++++++++++--- compiler/vm/irdbg.nim | 21 ++++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 35e9896e32c..2849f6f715e 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -367,6 +367,7 @@ proc generateCode*(g: ModuleGraph) = for i in 0.. Date: Wed, 17 Aug 2022 16:36:09 +0100 Subject: [PATCH 009/395] irgen: introduce a dedicated `bcNew` built-in Translate calls to `internalNew` to the new built-in --- compiler/vm/irgen.nim | 14 ++++++++++++-- compiler/vm/vmir.nim | 2 ++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 0beb2c20e67..8773ab1ace3 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -786,6 +786,17 @@ proc genMagic(c: var TCtx; n: PNode; m: TMagic): IRIndex = result = c.irCall("&", mConStrStr, args) # the proc doesn't raise so no ``raiseExit`` is needed here + of mNew: + # problem: ``lambdalifting`` inserts calls to ``internalNew`` by just + # using the symbol of the generic proc. This is a problem for + # type translation following after the IR-gen step, since it + # can't handle `tyGenericParam` types. + if n[0].sym.name.s == "internalNew": + let t = c.types.requestType(n[1].typ) + discard c.irs.irCall(bcNew, t, genx(c, n[1])) + else: + # a normal new. Don't do any special transformation + result = genCall(c, n) else: # TODO: return a bool instead and let the callsite call `genCall` in case # the magic doesn't use special logic here @@ -1178,8 +1189,7 @@ proc genObjConstr(c: var TCtx, n: PNode): IRIndex = let t = n.typ.skipTypes(abstractRange+{tyOwned}-{tyTypeDesc}) var obj: IRIndex if t.kind == tyRef: - let nSym = c.irSym getSysSym(c.graph, n.info, "internalNew") - discard c.irs.irCall(nSym, result) + discard c.irs.irCall(bcNew, NoneType, result) obj = c.irs.irDeref(result) else: obj = result diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index bb3eac0ab53..0e24eadbbc3 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -190,6 +190,8 @@ type bcCast # XXX: cast and conv should become dedicated ir nodes bcOverflowCheck + bcNew + bcUnlikely # XXX: alternatively, turn `system.unlikelyProc` into a .compilerproc IrNode3* = object From 989bb5f2f7f18a57e434963356fe72c4ddb73229 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:37:28 +0100 Subject: [PATCH 010/395] cgen2: improve type handling * reuse `GlobalGenCtx` across modules * perform type generation as a pre-pass * remove on-demand type translation --- compiler/vm/cbackend2.nim | 5 +- compiler/vm/cgen2.nim | 104 +++++++++----------------------------- 2 files changed, 29 insertions(+), 80 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 2849f6f715e..01fa0a2bc1b 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -411,12 +411,15 @@ proc generateCode*(g: ModuleGraph) = printTypes(irs, env) raise + var gCtx: GlobalGenCtx + initGlobalContext(gCtx, env) + for i, m in mlist.modules.pairs: let cfile = getCFile(conf, AbsoluteFile toFullPath(conf, m.sym.position.FileIndex)) var cf = Cfile(nimname: m.sym.name.s, cname: cfile, obj: completeCfilePath(conf, toObjFile(conf, cfile)), flags: {}) - emitModuleToFile(conf, cfile, env, moduleProcs[i]) + emitModuleToFile(conf, cfile, gCtx, env, moduleProcs[i]) addFileToCompile(conf, cf) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 994e35a3100..f9b305dec29 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -125,7 +125,7 @@ type CTypeDesc = distinct CDecl - CTypeId = distinct uint32 + CTypeId = TypeId CIdent = LitId ## An identifier in the generated code @@ -149,7 +149,7 @@ type isImmutable: bool # TODO: merge both bools into a `set` isVolatile: bool - GlobalGenCtx = object + GlobalGenCtx* = object ## Environment state that applies to all to all code, independent from ## which routine or module the code is in. @@ -167,10 +167,8 @@ type funcMap: Table[int, int] ## symbol-id -> index into `procs` # TODO: a table is maybe the wrong data structure here. funcs: seq[ProcHeader] - ctypeMap: Table[TypeKey, CTypeId] # ctypes: seq[CTypeInfo] # - defered: seq[(TypeId, CTypeId)] CAstBuilder = object ast: CAst @@ -218,30 +216,16 @@ type CTypeMap = Table[TypeKey, CTypeId] type TypeGenCtx = object # inherited state - tm: CTypeMap # mutated - ctypes: seq[CTypeInfo] # mutated - cache: IdentCache # mutated - + cache: IdentCache env: ptr IrEnv # non-inherited state weakTypes: set[TypeNodeKind] # the set of types that can be turned into forward declarations when declared as a pointer - forwardBegin: int - forwarded: seq[TypeId] ## types who's creation was defered. THe first entry - ## has an ID of `forwardBegin`, the second - ## `forwardBegin + 1`, etc. - func requestType(c: var TypeGenCtx, t: TypeId): CTypeId = ## Requests the type-id for `t`. If the c-type for `t` doesn't exist yet, a ## slot for it is reserved and it's added to the `c.forwared` list - let next = c.ctypes.len.CTypeId - result = c.tm.mgetOrPut(t.TypeKey, next) - if result == next: - # type wasn't generated yet - assert c.forwardBegin + c.forwarded.len == next.int - c.ctypes.setLen(c.ctypes.len + 1) - c.forwarded.add(t) + CTypeId(t) func requestFuncType(c: var TypeGenCtx, t: TypeId): CTypeId = # XXX: this is going to be tricky @@ -394,23 +378,6 @@ func getTypeName(c: var IdentCache, typ: Type, decl: Declaration): CIdent = let h = 0#hashType(typ) c.getOrIncl(fmt"{typ.kind}_{h}") - -func genForwarded(c: var TypeGenCtx) = - ## Generates the `CTypeInfo` for all forwarded types (and also for their - ## dependencies) - var i = 0 - # note: ``genCTypeDecl`` may add to ``forwarded`` - while i < c.forwarded.len: - let fwd = c.forwarded[i] - # XXX: forwarded could be cleared when ``i == forwarded.high`` in - # order to cut down on allocations - let decl = genCTypeDecl(c, c.forwarded[i]) - c.ctypes[c.forwardBegin + i] = CTypeInfo(decl: decl, name: getTypeName(c.cache, c.env.types[fwd], Declaration())) - inc i - - c.forwarded.setLen(0) - c.forwardBegin = c.ctypes.len # prepare for following ``requestType`` calls - func genCType(dest: var CDecl, cache: var IdentCache, t: Type) = template addIdentNode(n: string) = dest.add cdnkIdent, cache.getOrIncl(n).uint32 @@ -512,7 +479,7 @@ func mapTypeV2(c: var GenCtx, t: TypeId): CTypeId = func mapTypeV3(c: var GlobalGenCtx, t: TypeId): CTypeId = if t != NoneType: # XXX: maybe just have a ``NoneType`` -> ``VoidCType`` mapping in the table instead? - c.ctypeMap[t] + CTypeId(t) else: VoidCType @@ -1144,31 +1111,35 @@ proc writeDef(f: File, c: GlobalGenCtx, h: ProcHeader, decl: Declaration) = f.writeLine(") {") -proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, env: IrEnv, procs: openArray[(SymId, IrStore3)]) = +func initGlobalContext*(c: var GlobalGenCtx, env: IrEnv) = + ## Initializes the ``GlobalGenCtx`` to use for all following ``emitModuleToFile`` calls. Creates the ``CTypeInfo`` for each IR type. + + var gen = TypeGenCtx(weakTypes: {tnkRecord}, env: unsafeAddr env) + swap(gen.cache, c.idents) + + # XXX: a leftover from the CTypeId -> TypeId transition. Needs to be removed + c.ctypes.add(CTypeInfo(name: gen.cache.getOrIncl("void"))) # the `VoidCType` + + # TODO: use ``setLen`` + [] + var i = 0 + for id in types(env.types): + let decl = genCTypeDecl(gen, id) + c.ctypes.add CTypeInfo(decl: decl, name: getTypeName(gen.cache, env.types[id], Declaration())) + + swap(gen.cache, c.idents) + +proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalGenCtx, env: IrEnv, procs: openArray[(SymId, IrStore3)]) = let f = open(filename.string, fmWrite) defer: f.close() echo "Here: ", filename.string var - ctx: GlobalGenCtx mCtx: ModuleCtx asts: seq[CAst] - tgc = TypeGenCtx(weakTypes: {tnkRecord}, env: unsafeAddr env) - - template swapTypeCtx() = - swap(tgc.tm, ctx.ctypeMap) - swap(tgc.ctypes, ctx.ctypes) - swap(tgc.cache, ctx.idents) - - ctx.ctypes.add(CTypeInfo(name: ctx.idents.getOrIncl("void"))) # the `VoidCType` - # XXX: we need the `NimStringDesc` PType here - #ctx.ctypes.add(CTypeInfo(name: ctx.idents.getOrIncl("NimString"))) # XXX: wrong, see above mCtx.headers.incl("\"nimbase.h\"") - tgc.forwardBegin = ctx.ctypes.len - for sym, irs in procs.items: useFunction(mCtx, sym) @@ -1183,24 +1154,12 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, env: IrEnv, proc # requires less code duplication; and is also good for modularity c.types = computeTypes(irs, env) - swapTypeCtx() - - # request all types used inside the IR to be setup. Note that this only - # means that the C-type equivalents are created, not that the declarations - # are also emitted in the output file - for t in c.types.items: - if t != NoneType: - discard tgc.requestType(t) - - swapTypeCtx() - swap(c.gl, ctx) swap(c.m, mCtx) asts.add genCode(c, irs) swap(c.m, mCtx) swap(c.gl, ctx) - swapTypeCtx() # XXX: this might lead to an ordering problem, since we're not registering # the types on the first occurence # mark the types used in routine signatures as used @@ -1212,24 +1171,11 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, env: IrEnv, proc for it in env.types.params(sym.typ): if it != NoneType: - discard tgc.requestType(it) mCtx.useType(it) else: - discard tgc.requestType(sym.typ) mCtx.useType(sym.typ) - tgc.genForwarded() - - swapTypeCtx() - - var used: seq[CTypeId] - - block: - for typ in mCtx.types.items: - used.add ctx.ctypeMap[typ.TypeKey] - - for i, t in ctx.ctypes.pairs: - assert t.name != InvalidCIdent, $i + let used = mCtx.types f.writeLine "#define NIM_INTBITS 64" # TODO: don't hardcode @@ -1270,7 +1216,7 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, env: IrEnv, proc writeDecl(f, ctx, hdr, sym.decl) of skLet, skVar: - emitType(f, ctx, ctx.ctypeMap[sym.typ.TypeKey]) + emitType(f, ctx, sym.typ) f.write " " f.write mangledName(sym.decl) f.writeLine ";" From 614691b65a1c5d4b4e68e26b57443c4205287277 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:38:23 +0100 Subject: [PATCH 011/395] split procedures from symbols --- compiler/vm/cbackend2.nim | 53 +++++++++++-------- compiler/vm/cgen2.nim | 106 +++++++++++++++++++------------------ compiler/vm/irdbg.nim | 4 +- compiler/vm/irgen.nim | 14 ++--- compiler/vm/irpasses.nim | 41 +++++++-------- compiler/vm/irtypes.nim | 108 ++++++++++++++++++++++++++++++++++++-- compiler/vm/vmir.nim | 23 ++++++-- 7 files changed, 236 insertions(+), 113 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 01fa0a2bc1b..a8d0b79d457 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -199,8 +199,12 @@ proc generateEntryProc(c: var TCtx, g: PassEnv, mlist: ModuleList): IrStore3 = genInitProcCall(c.irs, mlist.modules[mainIdx]) # write ``programResult`` into the result variable + + # XXX: ``programResult`` is not a compiler*proc* + #[ let prSym = g.getCompilerProc("programResult") - c.irs.irAsgn(askInit, c.irs.irLocal(resultVar), c.irs.irSym(prSym)) + c.irs.irAsgn(askInit, c.irs.irLocal(resultVar), c.irs.irProc(prSym)) + ]# # refc-compatible "move" swap(result, c.irs) @@ -213,15 +217,14 @@ proc generateMain(c: var TCtx, g: PassEnv, # lastly, generate the actual code: result = generateEntryProc(c, g, mlist) -func collectRoutineSyms(s: IrStore3, env: SymbolEnv, list: var seq[PSym], known: var IntSet) = +func collectRoutineSyms(s: IrStore3, env: ProcedureEnv, list: var seq[PSym], known: var IntSet) = for n in s.nodes: case n.kind - of ntkSym: - let sym = env.orig[s.sym(n)] # XXX: inefficient + of ntkProc: + let sym = env.orig[n.procId] # XXX: inefficient # XXX: excluding all magics is wrong. Depending on which back-end is # used, some magics are treated like any other routine - if sym.kind in routineKinds and - sym.magic == mNone and + if sym.magic == mNone and sym.id notin known: known.incl(sym.id) list.add(sym) @@ -257,13 +260,14 @@ proc generateCode*(g: ModuleGraph) = echo "starting codgen" - var moduleProcs: seq[seq[(SymId, IrStore3)]] + var moduleProcs: seq[seq[(ProcId, IrStore3)]] moduleProcs.newSeq(mlist.modules.len) var env = IrEnv() var c = TCtx(config: g.config, graph: g, idgen: g.idgen) swap(c.symEnv, env.syms) + swap(c.procs, env.procs) c.types.env = addr env.types c.types.voidType = g.getSysType(unknownLineInfo, tyVoid) c.types.charType = g.getSysType(unknownLineInfo, tyChar) @@ -281,14 +285,14 @@ proc generateCode*(g: ModuleGraph) = var seenProcs: IntSet for it in mlist.modules.items: - collectRoutineSyms(it.initProc[1], c.symEnv, nextProcs, seenProcs) + collectRoutineSyms(it.initProc[1], c.procs, nextProcs, seenProcs) var nextProcs2: seq[PSym] while nextProcs.len > 0: for it in nextProcs.items: let mIdx = it.itemId.module let realIdx = mlist.moduleMap[it.getModule().id] - let sId = c.symEnv.requestSym(it) + let sId = c.procs.requestProc(it) if g.getBody(it).kind == nkEmpty: # a quick fix to not run `irgen` for 'importc'ed procs @@ -296,7 +300,7 @@ proc generateCode*(g: ModuleGraph) = continue let ir = generateCodeForProc(c, it) - collectRoutineSyms(c.unwrap ir, c.symEnv, nextProcs2, seenProcs) + collectRoutineSyms(c.unwrap ir, c.procs, nextProcs2, seenProcs) #doAssert mIdx == realIdx moduleProcs[realIdx].add((sId, c.unwrap ir)) @@ -311,7 +315,14 @@ proc generateCode*(g: ModuleGraph) = let passEnv = PassEnv() block: for sym in g.compilerprocs.items: - passEnv.compilerprocs[sym.name.s] = c.symEnv.requestSym(sym) + case sym.kind + of routineKinds: + passEnv.compilerprocs[sym.name.s] = c.procs.requestProc(sym) + of skType: + passEnv.compilertypes[sym.name.s] = c.types.requestType(sym.typ) + else: + # TODO: the rest (e.g. globals) also need to be handled + discard # XXX: a magic is not necessarily a procedure - it can also be a type # create a symbol for each magic to be used by the IR transformations @@ -331,13 +342,13 @@ proc generateCode*(g: ModuleGraph) = # we don't care about magic types here continue - passEnv.magics[m] = c.symEnv.addMagic(skProc, NoneType, name, m) + passEnv.magics[m] = c.procs.addMagic(NoneType, name, m) for op, tbl in passEnv.attachedOps.mpairs: for k, v in g.attachedOps[op].pairs: let t = c.types.lookupType(k) if t != NoneType: - tbl[t] = c.symEnv.requestSym(v) + tbl[t] = c.procs.requestProc(v) else: # XXX: is this case even possible discard#echo "missing type for type-bound operation" @@ -348,12 +359,9 @@ proc generateCode*(g: ModuleGraph) = for id, s in c.symEnv.msymbols: if (let orig = c.symEnv.orig.getOrDefault(id); orig != nil): - # a `nil` PType would get turned into a ``void`` and we explicitly - # don't want this behaviour here (that is, if the type is `nil`, it - # needs to map to ``NoneType``). ``irpasses.computeTypes`` depends on - # the type being empty if there is no type information - if orig.typ != nil: - s.typ = c.types.requestType(orig.typ) + s.typ = c.types.requestType(orig.typ) + + c.procs.finish(c.types) c.types.flush(c.symEnv, g.config) @@ -361,6 +369,7 @@ proc generateCode*(g: ModuleGraph) = generateMain(c, passEnv, mlist[]) swap(env.syms, c.symEnv) + swap(c.procs, env.procs) var lpCtx = LiftPassCtx(graph: passEnv, idgen: g.idgen, cache: g.cache) lpCtx.env = addr env @@ -371,7 +380,7 @@ proc generateCode*(g: ModuleGraph) = try: runPass(irs, initHookCtx(passEnv, irs, env), hookPass) - lowerTestError(irs, passEnv, env.types, env.syms) + lowerTestError(irs, passEnv, env.types, env.procs, env.syms) var rpCtx: RefcPassCtx rpCtx.setupRefcPass(passEnv, addr env, g, g.idgen, irs) runPass(irs, rpCtx, lowerSetsPass) @@ -391,7 +400,7 @@ proc generateCode*(g: ModuleGraph) = runPass(irs, lpCtx, typeV1Pass) except PassError as e: - let sym = env.syms.orig[s] + let sym = env.procs.orig[s] echo conf.toFileLineCol(sym.info) echoTrace(irs, e.n) printIr(irs, env, calcStmt(irs)) @@ -399,7 +408,7 @@ proc generateCode*(g: ModuleGraph) = printTypes(irs, env) raise except: - let sym = env.syms.orig.getOrDefault(s) + let sym = env.procs.orig.getOrDefault(s) if sym != nil: echo conf.toFileLineCol(sym.info) else: diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index f9b305dec29..c41307a914c 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -54,6 +54,7 @@ type types: PackedSet[TypeId] # all used type for the module syms: PackedSet[SymId] ## all used symbols that need to be declared in the C code. # TODO: should be a SymSet + funcs: PackedSet[ProcId] ## all used functions that need to be declared in the C code # TODO: header paths can be very regular. Maybe a CritBitTree[void} would make sense here? # TODO: the header includes are currently emitted in an arbitrary order, is that okay? (check the old cgen) @@ -133,7 +134,7 @@ type decl: CDecl name: CIdent # - ProcHeader = object + CProcHeader = object returnType: CTypeId args: seq[tuple[typ: CTypeId, name: CIdent]] @@ -165,7 +166,7 @@ type rttiV2: Table[TypeKey, CIdent] funcMap: Table[int, int] ## symbol-id -> index into `procs` # TODO: a table is maybe the wrong data structure here. - funcs: seq[ProcHeader] + funcs: seq[CProcHeader] ctypes: seq[CTypeInfo] # @@ -411,9 +412,9 @@ func genCType(cache: var IdentCache, t: Type): CTypeInfo = result.name = getTypeName(cache, t, Declaration()) -func useFunction(c: var ModuleCtx, s: SymId) = +func useFunction(c: var ModuleCtx, s: ProcId) = ## - c.syms.incl s + c.funcs.incl s #[ if lfHeader in s.loc.flags: c.headers.incl getStr(s.annex.path) @@ -425,6 +426,12 @@ func useType(c: var ModuleCtx, t: TypeId) = assert t != NoneType c.types.incl t +func useTypeAllowNone(c: var ModuleCtx, t: TypeId) = + # XXX: this shouldn't be allowed, but back-end generated magics have no type + # information + if t != NoneType: + c.types.incl t + #[ func useTypeWeak(c: var ModuleCtx, t: PType): CTypeId= c.types @@ -455,7 +462,7 @@ func requestTypeName(c: var GlobalGenCtx, t: PType): CIdent = type GenCtx = object f: File tmp: int - sym: SymId + sym: ProcId names: seq[CAst] # IRIndex -> expr types: seq[TypeId] @@ -470,35 +477,27 @@ func gen(c: GenCtx, irs: IrStore3, n: IRIndex): CAst = c.names[n] #"gen_MISSING" -func mapTypeV3(c: var GlobalGenCtx, t: TypeId): CTypeId +func mapTypeV3(c: GlobalGenCtx, t: TypeId): CTypeId func mapTypeV2(c: var GenCtx, t: TypeId): CTypeId = # TODO: unfinished c.m.useType(t) # mark the type as used + mapTypeV3(c.gl, t) -func mapTypeV3(c: var GlobalGenCtx, t: TypeId): CTypeId = +func mapTypeV3(c: GlobalGenCtx, t: TypeId): CTypeId = if t != NoneType: # XXX: maybe just have a ``NoneType`` -> ``VoidCType`` mapping in the table instead? CTypeId(t) else: VoidCType -func genProcHeader(c: var GlobalGenCtx, senv: SymbolEnv, tenv: TypeEnv, s: SymId): ProcHeader = - let - sym = senv[s] - typ = sym.typ - assert tenv[typ].kind == tnkProc +func genCProcHeader(c: var GlobalGenCtx, env: ProcedureEnv, s: ProcId): CProcHeader = + result.returnType = mapTypeV3(c, env.getReturnType(s)) - result.returnType = mapTypeV3(c, tenv.getReturnType(typ)) - - result.args.newSeq(tenv.numParams(typ)) + result.args.newSeq(env.numParams(s)) var i = 0 - for pt in tenv.params(typ): - result.args[i] = (mapTypeV3(c, pt), c.idents.getOrIncl(fmt"Param{i}")) - #[ - result.args[i] = (mapTypeV3(c, pt), - c.idents.getOrIncl(env[env[env[t].c].a + i].sym, .t.n[i].sym.name.s)) - ]# + for p in env.params(s): + result.args[i] = (mapTypeV3(c, p.typ), c.idents.getOrIncl(p.name)) inc i @@ -711,16 +710,20 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = let sId = irs.sym(n) let sym = c.env.syms[sId] # TODO: refactor - if sym.kind in routineKinds and sym.magic == mNone: - useFunction(c.m, sId) - elif sym.kind in {skVar, skLet} and sfGlobal in sym.flags: + if sym.kind in {skVar, skLet} and sfGlobal in sym.flags: c.m.syms.incl sId #discard mapTypeV3(c.gl, sym.typ) # XXX: temporary - if sym.kind notin routineKinds and sym.typ != NoneType: + if sym.typ != NoneType: useType(c.m, sym.typ) names[i] = start().ident(c.gl.idents, mangledName(sym.decl)).fin() + of ntkProc: + let prc = c.env.procs[n.procId] + if prc.magic == mNone: + useFunction(c.m, n.procId) + + names[i] = start().ident(c.gl.idents, mangledName(prc.decl)).fin() of ntkLocal: let (kind, typ, sym) = irs.getLocal(i) if sym == NoneSymbol: @@ -735,8 +738,8 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = names[i] = name else: let callee = irs.at(n.callee) - if callee.kind == ntkSym and (let s = c.env.syms[irs.sym(callee)]; s.magic != mNone): - names[i] = genMagic(c, irs, s.magic, n) + if callee.kind == ntkProc and (let p = c.env.procs[callee.procId]; p.magic != mNone): + names[i] = genMagic(c, irs, p.magic, n) else: var res = start().add(cnkCall, n.argCount.uint32).add(names[n.callee]) for it in n.args: @@ -809,7 +812,7 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = # exit # TODO: ``NoneType`` should only mean "no type information", not "void" - if c.env.types.getReturnType(c.env.syms[c.sym].typ) != NoneType: + if c.env.procs.getReturnType(c.sym) != NoneType: result.add cnkReturn else: result.add cnkReturn, 1 @@ -1083,7 +1086,7 @@ proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo) = assert pos == info.decl.len -proc writeDecl(f: File, c: GlobalGenCtx, h: ProcHeader, decl: Declaration) = +proc writeDecl(f: File, c: GlobalGenCtx, h: CProcHeader, decl: Declaration) = emitType(f, c, h.returnType) f.write(" ") f.write(mangledName(decl)) @@ -1096,7 +1099,7 @@ proc writeDecl(f: File, c: GlobalGenCtx, h: ProcHeader, decl: Declaration) = f.writeLine(");") -proc writeDef(f: File, c: GlobalGenCtx, h: ProcHeader, decl: Declaration) = +proc writeDef(f: File, c: GlobalGenCtx, h: CProcHeader, decl: Declaration) = emitType(f, c, h.returnType) f.write(" ") f.write(mangledName(decl)) @@ -1128,7 +1131,7 @@ func initGlobalContext*(c: var GlobalGenCtx, env: IrEnv) = swap(gen.cache, c.idents) -proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalGenCtx, env: IrEnv, procs: openArray[(SymId, IrStore3)]) = +proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalGenCtx, env: IrEnv, procs: openArray[(ProcId, IrStore3)]) = let f = open(filename.string, fmWrite) defer: f.close() @@ -1143,11 +1146,11 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG for sym, irs in procs.items: useFunction(mCtx, sym) - if sfImportc in env.syms[sym].flags: + if sfImportc in env.procs[sym].flags: asts.add(default(CAst)) continue - echo "genFor: ", env.syms[sym].decl.name #, " at ", conf.toFileLineCol(sym.info) + echo "genFor: ", env.procs[sym].decl.name #, " at ", conf.toFileLineCol(sym.info) var c = GenCtx(f: f, config: conf, sym: sym, env: unsafeAddr env) # doing a separate pass for the type computation instead of doing it in # `genCode` is probably a bit less efficient, but it's also simpler; @@ -1163,17 +1166,15 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG # XXX: this might lead to an ordering problem, since we're not registering # the types on the first occurence # mark the types used in routine signatures as used - for id in mCtx.syms.items: - let sym = env.syms[id] - case sym.kind - of routineKinds: - if sym.typ == NoneType: continue + for id in mCtx.funcs.items: + mCtx.useTypeAllowNone(env.procs.getReturnType(id)) - for it in env.types.params(sym.typ): - if it != NoneType: - mCtx.useType(it) - else: - mCtx.useType(sym.typ) + for it in env.procs.params(id): + mCtx.useType(it.typ) + + # mark the type of used non-proc symbols as used + for id in mCtx.syms.items: + mCtx.useType(env.syms[id].typ) let used = mCtx.types @@ -1207,14 +1208,15 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG # generate all procedure forward declarations + for id in mCtx.funcs.items: + #echo "decl: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) + let hdr = genCProcHeader(ctx, env.procs, id) + + writeDecl(f, ctx, hdr, env.procs[id].decl) + for id in mCtx.syms.items: let sym = env.syms[id] case sym.kind - of routineKinds: - #echo "decl: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) - let hdr = genProcHeader(ctx, env.syms, env.types, id) - - writeDecl(f, ctx, hdr, sym.decl) of skLet, skVar: emitType(f, ctx, sym.typ) f.write " " @@ -1233,13 +1235,13 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG let (id, _) = procs[i] - sym = env.syms[id] - let hdr = genProcHeader(ctx, env.syms, env.types, id) - writeDef(f, ctx, hdr, sym.decl) + prc = env.procs[id] + let hdr = genCProcHeader(ctx, env.procs, id) + writeDef(f, ctx, hdr, prc.decl) try: emitCAst(f, ctx, it) except: - echo "emit: ", sym.decl.name#, " at ", conf.toFileLineCol(sym.info) + echo "emit: ", prc.decl.name#, " at ", conf.toFileLineCol(sym.info) raise f.writeLine "}" inc i \ No newline at end of file diff --git a/compiler/vm/irdbg.nim b/compiler/vm/irdbg.nim index 57daf40b7ba..b8363cfaa88 100644 --- a/compiler/vm/irdbg.nim +++ b/compiler/vm/irdbg.nim @@ -9,7 +9,7 @@ func calcStmt*(irs: IrStore3): seq[bool] = var i = 0 for n in irs.nodes: case n.kind - of ntkSym, ntkLocal, ntkJoin, ntkLit, ntkGoto: + of ntkSym, ntkLocal, ntkJoin, ntkLit, ntkGoto, ntkProc: discard of ntkCall: for it in n.args: @@ -57,6 +57,8 @@ proc printIr*(irs: IrStore3, e: IrEnv, exprs: seq[bool]) = case n.kind of ntkSym: line = fmt"sym {e.syms[irs.sym(n)].decl.name}" + of ntkProc: + line = fmt"proc '{e.procs[n.procId].decl.name}'" of ntkAsgn: case n.asgnKind of askCopy, askDiscr: diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 8773ab1ace3..d1c3cf00ebe 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -75,6 +75,7 @@ type TCtx* = object options*: set[TOption] symEnv*: SymbolEnv + procs*: ProcedureEnv types*: DeferredTypeGen @@ -160,20 +161,19 @@ func closeScope(c: var TCtx) = let id = c.prc.scopeStack.pop() c.prc.scopes.add((true, id, c.irs.len)) -proc genProcSym(c: var TCtx, n: PNode): IRIndex = - assert n.kind == nkSym - c.irSym(n.sym) +proc genProcSym(c: var TCtx, s: PSym): IRIndex = + c.irs.irProc(c.procs.requestProc(s)) proc irCall(c: var TCtx, name: string, args: varargs[IRIndex]): IRIndex = # TODO: compiler procs should be cached here in `TCtx` let sym = c.graph.getCompilerProc(name) - c.irs.irCall(c.irSym(sym), args) + c.irs.irCall(genProcSym(c, sym), args) func irCall(c: var TCtx, name: string, m: TMagic, args: varargs[IRIndex]): IRIndex {.inline.} = # TODO: instead of creating a new duplicate magic each time, all used magics # should be only created once and then reused let sym = createMagic(c.graph, c.idgen, name, m) - c.irs.irCall(c.irSym(sym), args) + c.irs.irCall(genProcSym(c, sym), args) func genLocal(c: var TCtx, kind: LocalKind, t: PType): IRIndex = let @@ -563,7 +563,7 @@ proc genLit(c: var TCtx; n: PNode): IRIndex = proc genProcLit(c: var TCtx, n: PNode, s: PSym): IRIndex = - c.irSym(s) + c.irs.irProc(c.procs.requestProc(s)) #[ func doesAlias(c: TCtx, a, b: IRIndex): bool = @@ -624,7 +624,7 @@ proc genCall(c: var TCtx; n: PNode): IRIndex = let callee = if n[0].kind == nkSym: - genProcSym(c, n[0]) + genProcSym(c, n[0].sym) else: genx(c, n[0]) diff --git a/compiler/vm/irpasses.nim b/compiler/vm/irpasses.nim index c43d7ffe857..d49b6676303 100644 --- a/compiler/vm/irpasses.nim +++ b/compiler/vm/irpasses.nim @@ -63,16 +63,20 @@ template customAssert(cond: bool, node: IRIndex) = raise (ref PassError)(msg: astToStr(cond), n: node) type PassEnv* = ref object # XXX: will be a non-`ref` later on - magics*: Table[TMagic, SymId] - compilerprocs*: Table[string, SymId] + magics*: Table[TMagic, ProcId] + compilerprocs*: Table[string, ProcId] + compilertypes*: Table[string, TypeId] - attachedOps*: array[TTypeAttachedOp, Table[TypeId, SymId]] + attachedOps*: array[TTypeAttachedOp, Table[TypeId, ProcId]] sysTypes*: array[TTypeKind, TypeId] -func getCompilerProc*(g: PassEnv, name: string): SymId = +func getCompilerProc*(g: PassEnv, name: string): ProcId = g.compilerprocs[name] +func getCompilerType*(g: PassEnv, name: string): TypeId = + g.compilertypes[name] + func getSysType*(g: PassEnv, kind: TTypeKind): TypeId = g.sysTypes[kind] @@ -498,7 +502,7 @@ func computeTypes*(ir: IrStore3, env: IrEnv): seq[TypeId] = var i = 0 for n in ir.nodes: case n.kind - of ntkAsgn, ntkJoin, ntkGoto, ntkBranch, ntkContinue: + of ntkAsgn, ntkJoin, ntkGoto, ntkBranch, ntkContinue, ntkProc: discard of ntkCall: result[i] = @@ -507,24 +511,17 @@ func computeTypes*(ir: IrStore3, env: IrEnv): seq[TypeId] = n.typ else: let callee = ir.at(n.callee) - if callee.kind != ntkSym: + if callee.kind != ntkProc: env.types.getReturnType(result[n.callee]) # the callee's return type - elif (let t = env.syms[ir.sym(callee)].typ; t != NoneType): - env.types.getReturnType(t) else: - # the symbol for magics created with ``createMagic`` don't have - # type information - NoneType + env.procs.getReturnType(callee.procId) of ntkLit: result[i] = ir.getLit(n).typ of ntkSym: let s = ir.sym(n) customAssert s != NoneSymbol, i - if env.syms[s].kind notin routineKinds: - # don't compute the type for routine symbols. This makes it easier to - # figure out the type dependencies later on. - result[i] = env.syms[s].typ + result[i] = env.syms[s].typ of ntkUse, ntkConsume: result[i] = result[n.srcLoc] of ntkLocal: @@ -561,8 +558,8 @@ func getMagic(ir: IrStore3, env: IrEnv, n: IrNode3): TMagic = mNone else: let callee = ir.at(n.callee) - if callee.kind == ntkSym: - env.syms[ir.sym(callee)].magic + if callee.kind == ntkProc: + env.procs[callee.procId].magic else: mNone @@ -709,7 +706,7 @@ func hasAttachedOp*(c: HookCtx, op: TTypeAttachedOp, typ: TypeId): bool = assert typ != NoneType typ in c.graph.attachedOps[op] -func getAttachedOp(c: HookCtx, op: TTypeAttachedOp, typ: TypeId): SymId = +func getAttachedOp(c: HookCtx, op: TTypeAttachedOp, typ: TypeId): ProcId = assert typ != NoneType c.graph.attachedOps[op][typ] @@ -867,7 +864,7 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) of mLengthStr: cr.replace() # XXX: might be a good idea to cache the `string` type - let strTyp = c.extra.compilerprocs["NimStringDesc"] + let strTyp = c.extra.getCompilerType("NimStringDesc") #genIfThanElse(cr.insertMagicCall("isNil", mIsNil, a.val)) cr.insertError("Not implemented: lowerSeqsV1.mLengthStr") @@ -1033,7 +1030,7 @@ proc liftTypeInfoV1(c: var LiftPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCurs let name = "NTI" & $(typ.int) & "_" # XXX: too many short-lived and unnecessary allocations # TODO: cache the `TNimType` type - let globalType = c.env.syms[c.graph.getCompilerProc("TNimType")].typ + let globalType = c.graph.getCompilerType("TNimType") # the symbol is owned by the module the type is owned by s = c.addGlobal(globalType, name) @@ -1047,7 +1044,7 @@ proc liftTypeInfoV1(c: var LiftPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCurs const ErrFlagName = "nimError" -proc lowerTestError*(ir: var IrStore3, g: PassEnv, types: TypeEnv, syms: var SymbolEnv) = +proc lowerTestError*(ir: var IrStore3, g: PassEnv, types: TypeEnv, procs: ProcedureEnv, syms: var SymbolEnv) = ## Lowers ``bcTestError`` builtin calls for the C-like targets. Turns ## ``bcTestError`` into ``unlikelyProc(ErrFlagName[])`` and inserts a ## @@ -1083,7 +1080,7 @@ proc lowerTestError*(ir: var IrStore3, g: PassEnv, types: TypeEnv, syms: var Sym p = g.getCompilerProc("nimErrorFlag") # TODO: this lookup yields the same across all calls to `lowerTestError`. Cache both the compiler proc and it's return type - typ = types.getReturnType(syms[p].typ) + typ = procs.getReturnType(p) s = syms.addSym(skLet, typ, ErrFlagName) # XXX: no caching is currently done for the symbol names, so a lot of duplicated strings are created here... diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index 77c38daa20d..a33145acb29 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -2,6 +2,7 @@ import std/[ + hashes, tables ], compiler/front/[ @@ -172,6 +173,34 @@ type # XXX: `orig` will likely be removed/replaced later on orig*: Table[SymId, PSym] # stores the associated ``PSym`` for a symbol. Currently meant to be used by the code-generators. + ProcId* = distinct uint32 + + ProcHeader* = object + ## At the IR-level, there is no distinction done between ``func``s, + ## ``proc``s, ``iterator``s, and ``method``s. They are all treated as a + ## "procedure" and work the same. + + params*: seq[tuple[name: string, typ: TypeId]] + returnType*: TypeId + + magic*: TMagic + + flags*: TSymFlags # XXX: uses `TSymFlags` for now, but this will be changed to something else later on + + # XXX: each procedure requires a ``Declaration``, so it's stored as part of + # the type in order to avoid indirections via a ``DeclId`` (or similar). + # Since a ``Declaration`` object is quite large, this does mean that + # less ``ProcHeader``s fit into a cache-line. The declration is only + # needed by code-generators so it likely makes sense to move the + # declaration into a separate seq in ``ProcedureEnv`` + decl*: Declaration + + ProcedureEnv* = object + # TODO: maybe rename + procs: seq[ProcHeader] + map: Table[PSym, ProcId] + orig*: Table[ProcId, PSym] + const NoneType* = TypeId(0) const NoneSymbol* = SymId(0) @@ -183,10 +212,11 @@ const func `==`*(a, b: TypeId): bool {.borrow.} func `==`*(a, b: SymId): bool {.borrow.} +func `==`*(a, b: ProcId): bool {.borrow.} func `inc`*(a: var RecordNodeIndex, val: int = 1) {.borrow.} -type SomeId = TypeId | SymId | RecordId | FieldId +type SomeId = TypeId | SymId | RecordId | FieldId | ProcId template toIndex*(id: SomeId): uint32 = id.uint32 - 1 @@ -212,6 +242,9 @@ func `[]`*(e: TypeEnv, i: RecordNodeIndex): lent RecordNode = func `[]`*(e: TypeEnv, i: RecordId): lent RecordNode = e.records[toIndex(i)] +func `[]`*(e: ProcedureEnv, i: ProcId): lent ProcHeader {.inline.} = + e.procs[toIndex(i)] + func getReturnType*(e: TypeEnv, t: TypeId): TypeId = ## Returns the return type of the given procedure type `t` assert e[t].kind == tnkProc, $e[t].kind @@ -361,6 +394,7 @@ func requestSym*(e: var SymbolEnv, s: PSym): SymId = # the slot's id. The callsite of `requestSym` doesn't need # to have access to everything required for creating a ``Symbol`` from # a `PSym` then. + assert s.kind notin routineKinds let next = SymId(e.symbols.len + 1) # +1 since ID '0' is reserved for indicating 'none' @@ -621,10 +655,10 @@ func addSym*(e: var SymbolEnv, kind: TSymKind, typ: TypeId, name: string, flags: e.symbols.add(Symbol(kind: kind, typ: typ, flags: flags, decl: Declaration(name: name))) result = e.symbols.len.SymId -func addMagic*(e: var SymbolEnv, kind: TSymKind, typ: TypeId, name: string, m: TMagic): SymId = +func addMagic*(e: var ProcedureEnv, typ: TypeId, name: string, m: TMagic): ProcId = # XXX: temporary helper - e.symbols.add(Symbol(kind: kind, typ: typ, magic: m, decl: Declaration(name: name))) - result = e.symbols.len.SymId + e.procs.add ProcHeader(returnType: typ, magic: m, decl: Declaration(name: name)) + result = e.procs.len.ProcId iterator msymbols*(e: var SymbolEnv): (SymId, var Symbol) = var i = 0 @@ -687,3 +721,69 @@ func requestGenericType*(e: var TypeEnv, kind: TypeNodeKind, elem: TypeId): Type # TODO: first check if the type exists already e.types.add(Type(kind: kind, base: elem)) result = toId(e.types.high, TypeId) + +func translateProc*(s: PSym, types: var DeferredTypeGen, dest: var ProcHeader) = + assert s != nil + + dest.magic = s.magic + dest.flags = s.flags + + # fill in the declaration info + dest.decl.name = s.name.s + + # XXX: temporary workaround for manually created magic syms (e.g. via + # ``createMagic``), as these have no type information. Those shouldn't + # be passed to ``requestProc`` however and instead be handled differently + if s.typ == nil: + return + + # type information + let t = s.typ + + dest.returnType = types.requestType(t[0]) + + # walk the type node instead of the sons so that hidden parameters (used by + # closure procs) get added too + dest.params.setLen(t.n.len - 1) # skip the first node + for i in 1.. Date: Wed, 17 Aug 2022 16:39:19 +0100 Subject: [PATCH 012/395] cbackend2: move `PassEnv` setup into a separate procedure --- compiler/vm/cbackend2.nim | 90 ++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 44 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index a8d0b79d457..08822e42c19 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -249,6 +249,51 @@ proc getSysMagic2(g: ModuleGraph, name: string, m: TMagic): PSym = if r.magic == m: result = r +proc newPassEnv(g: ModuleGraph, tgen: var DeferredTypeGen, syms: var SymbolEnv, + procs: var ProcedureEnv): PassEnv = + new(result) + for sym in g.compilerprocs.items: + case sym.kind + of routineKinds: + result.compilerprocs[sym.name.s] = procs.requestProc(sym) + of skType: + result.compilertypes[sym.name.s] = tgen.requestType(sym.typ) + else: + # TODO: the rest (e.g. globals) also need to be handled + discard + + # XXX: a magic is not necessarily a procedure - it can also be a type + # create a symbol for each magic to be used by the IR transformations + for m in low(TMagic)..high(TMagic): + # fetch the name from a "real" symbol + let sym = g.getSysMagic2("", m) + + let name = + if sym.isNil(): + # not every magic has symbol defined in ``system.nim`` (e.g. procs and + # types only used in the backend) + $m + else: + sym.name.s + + if sym != nil and sym.kind notin routineKinds: + # we don't care about magic types here + continue + + result.magics[m] = procs.addMagic(NoneType, name, m) + + for op, tbl in result.attachedOps.mpairs: + for k, v in g.attachedOps[op].pairs: + let t = tgen.lookupType(k) + if t != NoneType: + tbl[t] = procs.requestProc(v) + else: + # XXX: is this case even possible + discard#echo "missing type for type-bound operation" + + for t in { tyVoid, tyInt..tyFloat64, tyBool, tyChar, tyString, tyCstring, tyPointer }.items: + result.sysTypes[t] = tgen.requestType(g.getSysType(unknownLineInfo, t)) + proc generateCode*(g: ModuleGraph) = ## The backend's entry point. Orchestrates code generation and linking. If @@ -312,50 +357,7 @@ proc generateCode*(g: ModuleGraph) = swap(nextProcs, nextProcs2) # setup a ``PassEnv`` - let passEnv = PassEnv() - block: - for sym in g.compilerprocs.items: - case sym.kind - of routineKinds: - passEnv.compilerprocs[sym.name.s] = c.procs.requestProc(sym) - of skType: - passEnv.compilertypes[sym.name.s] = c.types.requestType(sym.typ) - else: - # TODO: the rest (e.g. globals) also need to be handled - discard - - # XXX: a magic is not necessarily a procedure - it can also be a type - # create a symbol for each magic to be used by the IR transformations - for m in low(TMagic)..high(TMagic): - # fetch the name from a "real" symbol - let sym = g.getSysMagic2("", m) - - let name = - if sym.isNil(): - # not every magic has symbol defined in ``system.nim`` (e.g. procs and - # types only used in the backend) - $m - else: - sym.name.s - - if sym != nil and sym.kind notin routineKinds: - # we don't care about magic types here - continue - - passEnv.magics[m] = c.procs.addMagic(NoneType, name, m) - - for op, tbl in passEnv.attachedOps.mpairs: - for k, v in g.attachedOps[op].pairs: - let t = c.types.lookupType(k) - if t != NoneType: - tbl[t] = c.procs.requestProc(v) - else: - # XXX: is this case even possible - discard#echo "missing type for type-bound operation" - - for t in { tyVoid, tyInt..tyFloat64, tyBool, tyChar, tyString, tyCstring, tyPointer }.items: - passEnv.sysTypes[t] = c.types.requestType(g.getSysType(unknownLineInfo, t)) - + let passEnv = newPassEnv(g, c.types, c.symEnv, c.procs) for id, s in c.symEnv.msymbols: if (let orig = c.symEnv.orig.getOrDefault(id); orig != nil): From 1f95ef093e024fca7b066bba95afcf9959861c86 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:40:13 +0100 Subject: [PATCH 013/395] fix: don't unconditionally treat symbols as routines during `genCall` --- compiler/vm/irgen.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index d1c3cf00ebe..62e52fcd156 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -623,7 +623,7 @@ proc genCall(c: var TCtx; n: PNode): IRIndex = ## code-gen'ed let callee = - if n[0].kind == nkSym: + if n[0].kind == nkSym and n[0].sym.kind in routineKinds: genProcSym(c, n[0].sym) else: genx(c, n[0]) From 5f367120ba053c80c9b92592658e4df591587448 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:41:13 +0100 Subject: [PATCH 014/395] irgen: use a `PassEnv` as the source for magics and compilerprocs For bootstrapping the compiler, this reduces the number of used IR procs from ~15k to ~9.6k and the types from ~21k to ~18.5k. --- compiler/vm/cbackend2.nim | 12 +++++++----- compiler/vm/irgen.nim | 15 +++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 08822e42c19..e305f2bb4de 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -221,10 +221,11 @@ func collectRoutineSyms(s: IrStore3, env: ProcedureEnv, list: var seq[PSym], kno for n in s.nodes: case n.kind of ntkProc: - let sym = env.orig[n.procId] # XXX: inefficient + let sym = env.orig.getOrDefault(n.procId) # XXX: inefficient # XXX: excluding all magics is wrong. Depending on which back-end is # used, some magics are treated like any other routine - if sym.magic == mNone and + if sym != nil and + sym.magic == mNone and sym.id notin known: known.incl(sym.id) list.add(sym) @@ -317,6 +318,10 @@ proc generateCode*(g: ModuleGraph) = c.types.voidType = g.getSysType(unknownLineInfo, tyVoid) c.types.charType = g.getSysType(unknownLineInfo, tyChar) + # setup a ``PassEnv`` + let passEnv = newPassEnv(g, c.types, c.symEnv, c.procs) + c.passEnv = passEnv + # generate all module init procs (i.e. code for the top-level statements): for m in mlist.modules.mitems: c.module = m.sym @@ -356,9 +361,6 @@ proc generateCode*(g: ModuleGraph) = nextProcs.setLen(0) swap(nextProcs, nextProcs2) - # setup a ``PassEnv`` - let passEnv = newPassEnv(g, c.types, c.symEnv, c.procs) - for id, s in c.symEnv.msymbols: if (let orig = c.symEnv.orig.getOrDefault(id); orig != nil): s.typ = c.types.requestType(orig.typ) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 62e52fcd156..46a99f80aba 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -31,6 +31,9 @@ import from compiler/vm/vmaux import findRecCase, findMatchingBranch from compiler/vm/vmdef import unreachable +# XXX: temporary import; needed for ``PassEnv`` +import compiler/vm/irpasses + type TBlock = object label: PSym start: JoinPoint @@ -68,6 +71,8 @@ type TCtx* = object graph*: ModuleGraph # only needed for testing if a proc has a body idgen*: IdGenerator # needed for creating magics on-demand + passEnv*: PassEnv + module*: PSym config*: ConfigRef @@ -166,14 +171,12 @@ proc genProcSym(c: var TCtx, s: PSym): IRIndex = proc irCall(c: var TCtx, name: string, args: varargs[IRIndex]): IRIndex = # TODO: compiler procs should be cached here in `TCtx` - let sym = c.graph.getCompilerProc(name) - c.irs.irCall(genProcSym(c, sym), args) + let prc = c.passEnv.getCompilerProc(name) + c.irs.irCall(c.irs.irProc(prc), args) func irCall(c: var TCtx, name: string, m: TMagic, args: varargs[IRIndex]): IRIndex {.inline.} = - # TODO: instead of creating a new duplicate magic each time, all used magics - # should be only created once and then reused - let sym = createMagic(c.graph, c.idgen, name, m) - c.irs.irCall(genProcSym(c, sym), args) + # TODO: maybe store all used magics directly in ``TCtx``? + c.irs.irCall(c.irs.irProc(c.passEnv.magics[m]), args) func genLocal(c: var TCtx, kind: LocalKind, t: PType): IRIndex = let From 2c20e1eb16c165b794093e974c2b4867183dc78f Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:42:15 +0100 Subject: [PATCH 015/395] irpasses: add type lowering pass for `seqsV1` --- compiler/vm/cbackend2.nim | 7 ++ compiler/vm/irpasses.nim | 155 +++++++++----------------------------- 2 files changed, 43 insertions(+), 119 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index e305f2bb4de..46da5cd006a 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -424,6 +424,13 @@ proc generateCode*(g: ModuleGraph) = printTypes(irs, env) raise + # type lowering passes + var ttc = TypeTransformCtx(graph: passEnv) + if optSeqDestructors in conf.globalOptions: + discard + else: + lowerSeqTypesV1(ttc, env.types, env.syms) + var gCtx: GlobalGenCtx initGlobalContext(gCtx, env) diff --git a/compiler/vm/irpasses.nim b/compiler/vm/irpasses.nim index d49b6676303..155f43b2b38 100644 --- a/compiler/vm/irpasses.nim +++ b/compiler/vm/irpasses.nim @@ -763,34 +763,6 @@ func setupTransCtx*(g: ModuleGraph, ir: IrStore3, env: IrEnv): GenericTransCtx = # during compilation instead const SeqDataFieldPos = 2 -proc requestSeqType(c: var RefcPassCtx, t: PType): PType = - ## `t` is the original ``tySequence`` type. The resulting type has the following definition: - ## - ## .. code:: nim - ## type NimSeq = ptr object of TGenericSeq - ## data: UncheckedArray[t.elemType] - - let cache = c.graph.cache - - # TODO: use a cache for the instantiations - # XXX: yeah, this is bad; same as for symbols, it'd probably be a good idea to - # introduce a custom type representation for the whole compiler backend - let - typSym = newSym(skType, cache.getIdent("NimSeq"), c.idgen.nextSymId(), t.owner, t.owner.info) - objTyp = newType(tyObject, c.idgen.nextTypeId(), t.owner) - f = newSym(skField, cache.getIdent("data"), c.idgen.nextSymId(), typSym, t.owner.info) - - f.position = SeqDataFieldPos - f.typ = newType(tyUncheckedArray, nextTypeId c.idgen, t.owner) - f.typ.add t.elemType - - objTyp.add(c.graph.getCompilerProc("TGenericSeq").typ) # base type - objTyp.n = newTree(nkRecList, [newSymNode(f)]) - - result = newType(tyPtr, c.idgen.nextTypeId(), t.owner) - result.add(objTyp) - result.linkTo(typSym) - proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) = ## Lowers the `seq`-related magic operations into calls to the v1 `seq` ## implementation @@ -872,97 +844,6 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) else: discard - #[ - of ntkLocal: - # replace locals of `seq` and `string` type with locals of the lowered type - # XXX: there's currently no way to replace an existing local (would be - # simpler and more efficient), so the logic here resorts to - # introducing a new local and replacing all reference to the old one - let (lk, origTyp, sym) = ir.getLocal(cr.position) - - let typ = origTyp - - # TODO: handle ``var`` and ``lent`` wrapped types here - case typ.kind - of tySequence: - # replace seqs with `NimSeq`. The latter is bascially a generic type - # that were instantiating manually here. The old C backend did this step - # in the code-generator - cr.replace() - - let idx = ir.getLocalIdx(cr.position) - var newName = c.localMap.getOrDefault(idx, -1) - if newName == -1: - let nt = c.requestSeqType(typ) - if sym != nil: - let ns = copySym(sym, c.idgen.nextSymId()) - ns.typ = nt - - newName = cr.newLocal(lk, ns) - else: - newName = cr.newLocal(lk, nt) - - c.localMap[idx] = newName - - discard cr.insertLocalRef(newName) - - of tyString: - # replace `string` with `NimString` - - cr.replace() - - let idx = ir.getLocalIdx(cr.position) - var newName = c.localMap.getOrDefault(idx, -1) - if newName == -1: - # XXX: ugly; the whole backend would probably benefit from it's own - # symbol representation - - let nt = c.graph.getCompilerProc("NimString").typ - if sym != nil: - let ns = copySym(sym, c.idgen.nextSymId()) - ns.typ = nt - newName = cr.newLocal(lk, ns) - else: - newName = cr.newLocal(lk, nt) - - c.localMap[idx] = newName - - discard cr.insertLocalRef(newName) - - else: - discard - - of ntkSym: - # replace `string` and `seq` types of globals and parameters by directly - # modifying the `PSym`s - - let sym = ir.sym(n) - - # XXX: the symbol patching here won't work out... - if sym.kind notin {skVar, skLet, skParam}: - # XXX: ignore constants for now - return - - var newTyp: PType = nil - - let typ = sym.typ.skipTypes(abstractInst) - let newType = - case skipTypes(typ, {tyVar, tyLent}).kind - of tySequence: c.requestSeqType(typ) - of tyString: c.graph.getCompilerProc("NimString").typ - else: nil - - # this overwrites possibly present ``tyGenericInst``, ``tyDistinct``, - # etc. but at this point in the backend, we no longer need those - if newType != nil: - if typ.kind == tyVar: - # only ``var seq`` is treated as a pointer-to-pointer, not ``lent`` - sym.typ = newType(tyVar, nextTypeId c.idgen, typ.owner) - sym.typ.add newType - else: - sym.typ = newType - ]# - of ntkPathArr: let arrTyp = c.typeof(n.srcLoc) @@ -987,6 +868,42 @@ proc lowerSeqsV1(c: var RefcPassCtx, n: IrNode3, ir: IrStore3, cr: var IrCursor) else: discard "ignore" +type TypeTransformCtx* = object + graph*: PassEnv + +func lowerSeqTypesV1*(c: var TypeTransformCtx, tenv: var TypeEnv, senv: var SymbolEnv) = + let + strTyp = c.graph.getCompilerType("NimString") + seqTyp = c.graph.getCompilerType("TGenericSeq") + + for id, typ in tenv.mtypes: + case typ.kind + of tnkString: + # TODO: this doesn't work. Since we're doing no deduplication, there may + # exist multiple string types and we're overwriting all of them + # with the new type, meaning that we now have multiple instances + # of the new type! + # overwrite with + typ = tenv[c.graph.getCompilerType("NimString")] + of tnkSeq: + # XXX: same as for strings, we're creating duplicate types here + + # replace a ``seq[T]`` with the following: + # + # .. code:: nim + # type PSeq = ptr object of TGenericSeq # name is just an example + # data: UncheckedArray[T] + # + + let + arr = tenv.requestGenericType(tnkUncheckedArray, typ.base) + sym = senv.addSym(skField, arr, "data") # TODO: this is bad; don't use ``Symbol`` to store field naming information + rec = tenv.requestRecordType(base = seqTyp, [(sym, arr)]) + typ = genGenericType(tnkPtr, rec) + else: + discard + + func lowerSeqsV2(c: GenericTransCtx, n: IrNode3, cr: var IrCursor) = ## Lowers the `seq`-related magic operations into calls to the v2 `seq` ## implementation. Enabled by the `optSeqDestructors` toggle From 626d217907956ba032a8ec307560fe60137e75d4 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:43:11 +0100 Subject: [PATCH 016/395] irpasses: remove debug logic from `runPass` --- compiler/vm/irpasses.nim | 38 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/compiler/vm/irpasses.nim b/compiler/vm/irpasses.nim index 155f43b2b38..aa10087b78b 100644 --- a/compiler/vm/irpasses.nim +++ b/compiler/vm/irpasses.nim @@ -84,20 +84,11 @@ proc runPass*[T](irs: var IrStore3, ctx: T, pass: LinearPass[T]) = var cursor: IrCursor cursor.setup(irs) - try: - var i = 0 - for n in irs.nodes: - cursor.setPos(i) - pass.visit(ctx, n, cursor) - inc i - except PassError as e: - echo e.getStackTrace() - echo "Msg: ", e.msg - echo "IR (error at node: ", e.n, "):" - #printIr(irs, calcStmt(irs)) - echo "Node was added at: " - for e in irs.traceFor(e.n).items: - debugEcho e + var i = 0 + for n in irs.nodes: + cursor.setPos(i) + pass.visit(ctx, n, cursor) + inc i irs.update(cursor) @@ -105,20 +96,11 @@ proc runPass*[T](irs: var IrStore3, ctx: var T, pass: LinearPass2[T]) = var cursor: IrCursor cursor.setup(irs) - try: - var i = 0 - for n in irs.nodes: - cursor.setPos(i) - pass.visit(ctx, n, irs, cursor) - inc i - except PassError as e: - echo e.getStackTrace() - echo "Msg: ", e.msg - echo "IR (error at node: ", e.n, "):" - #printIr(irs, calcStmt(irs)) - echo "Node was added at: " - for e in irs.traceFor(e.n).items: - debugEcho e + var i = 0 + for n in irs.nodes: + cursor.setPos(i) + pass.visit(ctx, n, irs, cursor) + inc i irs.update(cursor) From 7ca77964641c8ecaf5ae2c86d5cd4d686060ebae Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:44:07 +0100 Subject: [PATCH 017/395] add assertions to `newLocal` --- compiler/vm/vmir.nim | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 945619eba0a..06c82b4f691 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -1952,10 +1952,12 @@ func insertJoin*(cr: var IrCursor, t: JoinPoint) = discard func newLocal*(cr: var IrCursor, kind: LocalKind, t: TypeId, s: SymId): int = + assert t != NoneType cr.newLocals.add((kind, t, s)) func newLocal*(cr: var IrCursor, kind: LocalKind, t: TypeId): int = assert kind == lkTemp + assert t != NoneType cr.newLocals.add((kind, t, NoneSymbol)) func insertLocalRef*(cr: var IrCursor, name: int): IRIndex = From fabd4bc2b219186db7ac6d8b5a363c208c4e9574 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:45:24 +0100 Subject: [PATCH 018/395] irtypes: make type generation traces optional In addition, clean up collected traces on flush --- compiler/vm/irtypes.nim | 49 ++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index a33145acb29..dc04dad1cb0 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -16,6 +16,8 @@ import from compiler/vm/vmdef import unreachable +const useGenTraces {.booldefine.} = false + type RecordNodeKind* = enum rnkEmpty # meant to be used by the garbage collector to fill cleaned slots rnkList @@ -122,15 +124,19 @@ type # would also make sure that the borrowed from # `TypeEnv` is sealed for the lifetime of the `DeferredTypeGen` map: Table[ItemId, TypeId] # type-id -> ``TypeId`` - list: seq[(PType, int)] ## the list of deferred types in the order they were requested + list: seq[PType] ## the list of deferred types in the order they were requested voidType*: PType ## a ``PType`` of kind ``tyVoid``. Requesting a nil type ## is remapped to a request using this type charType*: PType - trace: int - traces: seq[seq[StackTraceEntry]] - isInGen: bool + when useGenTraces: + traceMap: seq[int] ## stores the corresponding trace index for each + ## entry in `list` + + trace: int + traces: seq[seq[StackTraceEntry]] + isInGen: bool nextTypeId: uint32 @@ -363,12 +369,8 @@ func skipTypesConsiderImported(t: PType, kinds: TTypeKinds): tuple[imported: boo return result.t = lastSon(result.t) -func requestType*(gen: var DeferredTypeGen, t: PType): TypeId = - let t = - if t != nil: t - else: gen.voidType - - let trace = +when useGenTraces: + func requestTrace(gen: var DeferredTypeGen): int = if gen.isInGen: gen.trace else: @@ -376,13 +378,23 @@ func requestType*(gen: var DeferredTypeGen, t: PType): TypeId = gen.traces.add getStackTraceEntries() gen.traces.high +func requestType*(gen: var DeferredTypeGen, t: PType): TypeId = + let t = + if t != nil: t + else: gen.voidType + + #[ if t.kind in {tyTyped, tyUntyped, tyGenericParam}: debugEcho gen.traces[trace] + ]# let next = TypeId(gen.nextTypeId + 1) result = gen.map.mgetOrPut(t.itemId, next) if result == next: - gen.list.add((t, trace)) + gen.list.add(t) + when useGenTraces: + gen.traceMap.add requestTrace(gen) + inc gen.nextTypeId func lookupType*(gen: DeferredTypeGen, t: ItemId): TypeId = @@ -620,14 +632,17 @@ proc flush*(gen: var DeferredTypeGen, symEnv: var SymbolEnv, conf: ConfigRef) = swap(symEnv, ctx.syms) defer: swap(symEnv, ctx.syms) - gen.isInGen = true + when useGenTraces: + gen.isInGen = true let start = gen.env.types.len var i = 0 while i < gen.list.len: gen.env.types.add(default(Type)) - gen.trace = gen.list[i][1] - translate(gen.env[], ctx, conf, start + i, gen.list[i][0]) + when useGenTraces: + gen.trace = gen.list[i][1] + + translate(gen.env[], ctx, conf, start + i, gen.list[i]) inc i # fix up pass. Remove ``tnkRef`` indirections when used as the base type of objects and also set the relative field offset. @@ -646,7 +661,11 @@ proc flush*(gen: var DeferredTypeGen, symEnv: var SymbolEnv, conf: ConfigRef) = else: discard - gen.isInGen = false + when useGenTraces: + gen.isInGen = false + gen.traceMap.setLen(0) + gen.traces.setLen(0) + # support re-using gen.list.setLen(0) From 1d5aa5fdd0609460df3849d4b9f705102f92ce9d Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:46:29 +0100 Subject: [PATCH 019/395] irtypes: skip irrelevant types before looking for a slot --- compiler/vm/irtypes.nim | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index dc04dad1cb0..19659e40c19 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -211,10 +211,12 @@ const NoneType* = TypeId(0) const NoneSymbol* = SymId(0) # XXX: copied from `ccgtypes`, might need some adjustments +# note: we DO care for ``tyDistinct``s, since they act as nominal types and +# type-bound operations can be attached to them const irrelevantForBackend = {tyGenericBody, tyGenericInst, tyGenericInvocation, - tyDistinct, tyRange, tyStatic, tyAlias, tySink, - tyInferred, tyOwned} + tyRange, tyStatic, tyAlias, tySink, tyInferred, + tyOwned} func `==`*(a, b: TypeId): bool {.borrow.} func `==`*(a, b: SymId): bool {.borrow.} @@ -378,10 +380,29 @@ when useGenTraces: gen.traces.add getStackTraceEntries() gen.traces.high +template overlaps(a: set, b: set): bool = + a * b != {} + func requestType*(gen: var DeferredTypeGen, t: PType): TypeId = - let t = - if t != nil: t - else: gen.voidType + var t = t + if t == nil: + t = gen.voidType + + # skip all types that we are not interested in on the IR level. This does + # however mean that the original type name is lost and the code-generators + # can't make use of it. + # For the version this comment was written against the skipping reduced the + # number of genrated types by 5%, the record nodes by 50%, and the fields + # by 30% + while true: + if t.sym != nil and t.sym.flags.overlaps({sfImportc, sfExportc}): + # don't skip types that have an external interface attached + break + + if t.kind in irrelevantForBackend: + t = t.lastSon + else: + break #[ if t.kind in {tyTyped, tyUntyped, tyGenericParam}: From 920ca43827c14d7ef325e1597d45fa55f9cb36fe Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:47:07 +0100 Subject: [PATCH 020/395] irtypes: introduce the concept of "declared types" --- compiler/vm/irtypes.nim | 44 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index 19659e40c19..bd9acc23eab 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -102,6 +102,39 @@ type Type* = object c*: uint32 # for records, a ``RecordNodeIndex`` sig: seq[TypeId] # for procedures +type + DeclTypeId* = distinct uint32 + + DeclType* = object + ## A `DeclType` is a direct translation from ``PType``. It corresponds to + ## the types defined in the source-code. + ## + ## .. code-block:: nim + ## + ## type + ## A = int # Both `A` and `int` + ## B = object # `B` + ## x: int # `int` + ## var x: seq[int] # `seq[int]` + ## var y: var int # `var int` + ## + ## `TypeDecl` links a canonical type with an "interface" and extra data. + ## A canonical type is the type representation used at the IR-level. + ## The "interface" has the information necessary for the code-generator to + ## handle imported and exported types (e.g. the imported name). + ## The extra data are things that aren't needed for the code-generator or + ## the IR processing to work (e.g. the name), but would, for example, help + ## with generating easier to read code + + # TODO: maybe rename to `DeclaredType`? + + canonical: TypeId + + # XXX: the "interface" and extra part are both combined into a `PSym` for + # now. This makes the first implementation simpler and also helps + # with reducing memory usage + decl: PSym + type TypeEnv* = object ## Holds the data for all types # XXX: in general, a `seq[seq]` could be used for `records`, `fields`, and @@ -111,6 +144,9 @@ type TypeEnv* = object records: seq[RecordNode] ## the bodies for all record-like types (objects and tuples) in one contiguous seq fields: seq[FieldDesc] ## all fields types: seq[Type] ## all types in one contiguous seq + + decls: seq[DeclType] ## all declared types + # XXX: maybe a redirection table for `tnkName` makes sense? Alternatively, # indirections to another tnkName could be allowed typdescs: Table[ItemId, PType] # type-id -> a `tyTypeDesc` type @@ -209,6 +245,7 @@ type const NoneType* = TypeId(0) const NoneSymbol* = SymId(0) +const NoneDType* = DeclTypeId(0) # XXX: copied from `ccgtypes`, might need some adjustments # note: we DO care for ``tyDistinct``s, since they act as nominal types and @@ -224,7 +261,7 @@ func `==`*(a, b: ProcId): bool {.borrow.} func `inc`*(a: var RecordNodeIndex, val: int = 1) {.borrow.} -type SomeId = TypeId | SymId | RecordId | FieldId | ProcId +type SomeId = TypeId | SymId | RecordId | FieldId | ProcId | DeclTypeId template toIndex*(id: SomeId): uint32 = id.uint32 - 1 @@ -238,6 +275,11 @@ func `[]`*(e: SymbolEnv, s: SymId): lent Symbol = func `[]`*(e: TypeEnv, t: TypeId): lent Type = e.types[toIndex(t)] +func `[]`*(e: TypeEnv, t: DeclTypeId): lent Type = + ## For the convenience of the IR processing steps, this procedure returns a + ## ``Type`` instead of a ``DeclType`` + e.types[e.decls[toIndex(t)].canonical.toIndex] + func `[]`*(e: TypeEnv, f: FieldId): lent FieldDesc = e.fields[f.int - 1] From 2c2ad652da6750dcd7874937e1bf7ab480a4dd15 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:48:29 +0100 Subject: [PATCH 021/395] irtypes: store the original symbol as part of the type --- compiler/vm/irtypes.nim | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index bd9acc23eab..77bac103398 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -102,6 +102,10 @@ type Type* = object c*: uint32 # for records, a ``RecordNodeIndex`` sig: seq[TypeId] # for procedures + # XXX: even though ``DeclType`` exists, `Type` is used to store the + # interface information for now + iface*: PSym + type DeclTypeId* = distinct uint32 @@ -706,6 +710,7 @@ proc flush*(gen: var DeferredTypeGen, symEnv: var SymbolEnv, conf: ConfigRef) = gen.trace = gen.list[i][1] translate(gen.env[], ctx, conf, start + i, gen.list[i]) + gen.env.types[start + i].iface = gen.list[i].sym inc i # fix up pass. Remove ``tnkRef`` indirections when used as the base type of objects and also set the relative field offset. From 40c9aafcdb73829f6bf6286dad82dc8f365953c4 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:49:14 +0100 Subject: [PATCH 022/395] cgen2: progress on type handling * support import/export * handle primitive types correctly * proper naming of emitted types --- compiler/vm/cgen2.nim | 121 +++++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index c41307a914c..032e9e1e306 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -24,7 +24,8 @@ import ], compiler/utils/[ int128, - pathutils + pathutils, + ropes ], compiler/vm/[ irtypes, @@ -191,7 +192,10 @@ func `==`(a, b: CTypeId): bool {.borrow.} func mangledName(sym: PSym): string = # TODO: cache the mangled names (and don't use TLoc for it!) # TODO: implement - sym.name.s + if {sfImportc, sfExportc} * sym.flags != {}: + $sym.loc.r + else: + sym.name.s func mangledName(d: Declaration): string = # XXX: temporary @@ -290,8 +294,6 @@ func genRecordNode(c: var TypeGenCtx, decl: var CDecl, i: var RecordNodeIndex, f unreachable(n.kind) -func genCType(dest: var CDecl, cache: var IdentCache, t: Type) - func addWeakType(dest: var CDecl, c: var TypeGenCtx, t: TypeId) = # don't use a weak-dependency for ptr-like types let kind = @@ -356,60 +358,58 @@ func genCTypeDecl(c: var TypeGenCtx, t: TypeId): CDecl = # ``SEQ_DECL_SIZE`` is a macro defined in ``nimbase.h`` result.add cdnkIdent, c.cache.getOrIncl("SEQ_DECL_SIZE").uint32 - #[ - - of tyGenericInst, tyOwned: - result = genCTypeDecl(c, t.lastSon) - of tyDistinct, tyRange, tyOrdinal: - result = genCTypeDecl(c, t[0]) - - ]# + of tnkCString: + result.add cdnkPtr + result.add cdnkIdent, c.cache.getOrIncl("char").uint32 else: - # XXX: using `genCType` doesn't feel right - genCType(result, c.cache, c.env.types[t]) + let kind = c.env.types[t].kind + result.add cdnkIdent, c.cache.getOrIncl(fmt"genCType_missing_{kind}").uint32 assert result.len > 0 -func getTypeName(c: var IdentCache, typ: Type, decl: Declaration): CIdent = +func getTypeName(c: var IdentCache, id: TypeId, typ: Type, decl: Declaration): CIdent = # TODO: not finished - if decl.name.len > 0: - c.getOrIncl(mangledName(decl)) - else: - let h = 0#hashType(typ) - c.getOrIncl(fmt"{typ.kind}_{h}") - -func genCType(dest: var CDecl, cache: var IdentCache, t: Type) = - template addIdentNode(n: string) = - dest.add cdnkIdent, cache.getOrIncl(n).uint32 - - const - NumericalTypeToStr: array[tyInt..tyUInt64, string] = [ - "NI", "NI8", "NI16", "NI32", "NI64", - "NF", "NF32", "NF64", "NF128", - "NU", "NU8", "NU16", "NU32", "NU64"] - - case t.kind - of tnkVoid: addIdentNode("void") - of tnkInt: - {.warning: "NI is never emitted anymore, as we can't detect an `int` here".} - addIdentNode(fmt"NI{t.size}") - of tnkUInt: - addIdentNode(fmt"NU{t.size}") - of tnkCString: - dest.add cdnkPtr - addIdentNode("NIM_CHAR") - of tnkChar: - addIdentNode("NIM_CHAR") - of tnkBool: - addIdentNode("NIM_BOOL") + if typ.iface != nil: + assert sfImportc notin typ.iface.flags + c.getOrIncl(mangledName(typ.iface)) else: - addIdentNode(fmt"genCType_missing_{t.kind}") + # some types require a definition and thus need a name + case typ.kind + of tnkProc: + c.getOrIncl(fmt"proc_{id.uint32}") + of tnkRecord: + # a record type without a name is always a tuple + c.getOrIncl(fmt"tuple_{id.uint32}") + of tnkArray, tnkUncheckedArray: + c.getOrIncl(fmt"array_{id.uint32}") + else: + # the other types don't need generated names + InvalidCIdent + +const AutoImported = {tnkVoid, tnkBool, tnkChar, tnkInt, tnkUInt, tnkFloat} # types that are treated as imported + +func genCTypeInfo(gen: var TypeGenCtx, env: TypeEnv, id: TypeId): CTypeInfo = + let t = env[id] + if t.iface != nil and sfImportc in t.iface.flags: + result = CTypeInfo(name: gen.cache.getOrIncl(mangledName(t.iface))) + elif t.kind in AutoImported: + let name = + case t.kind + of tnkVoid: "void" + of tnkChar: "NIM_CHAR" + of tnkBool: "NIM_BOOL" + of tnkInt: + {.warning: "NI is never emitted anymore, as we can't detect an `int` here".} + fmt"NI{t.size}" + of tnkUInt: fmt"NU{t.size}" + of tnkFloat: fmt"NF{t.size}" + else: unreachable() -func genCType(cache: var IdentCache, t: Type): CTypeInfo = - # TODO: name handling is unfinished - genCType(result.decl, cache, t) - result.name = getTypeName(cache, t, Declaration()) + result = CTypeInfo(name: gen.cache.getOrIncl(name)) + else: + let decl = genCTypeDecl(gen, id) + result = CTypeInfo(decl: decl, name: getTypeName(gen.cache, id, t, Declaration())) func useFunction(c: var ModuleCtx, s: ProcId) = @@ -1018,11 +1018,14 @@ proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl, pos: var int) = let info {.cursor.} = c.ctypes[n.a.uint32] assert info.name != InvalidCIdent - f.write: - case info.decl[0].kind - of cdnkStruct: "struct " - of cdnkUnion: "union " - else: unreachable() + if info.decl.len > 0: + # only specify the namespace if the type has a declaration (it's an + # imported type otherwise) + f.write: + case info.decl[0].kind + of cdnkStruct: "struct " + of cdnkUnion: "union " + else: unreachable(info.decl[0].kind) f.write c.idents[info.name] @@ -1124,10 +1127,8 @@ func initGlobalContext*(c: var GlobalGenCtx, env: IrEnv) = c.ctypes.add(CTypeInfo(name: gen.cache.getOrIncl("void"))) # the `VoidCType` # TODO: use ``setLen`` + [] - var i = 0 for id in types(env.types): - let decl = genCTypeDecl(gen, id) - c.ctypes.add CTypeInfo(decl: decl, name: getTypeName(gen.cache, env.types[id], Declaration())) + c.ctypes.add genCTypeInfo(gen, env.types, id) swap(gen.cache, c.idents) @@ -1198,8 +1199,8 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG if n.kind == cdnkType: emitWithDeps(f, c, n.a.CTypeId, marker) - if info.decl.len > 0: - # only emit types that have a declaration + if info.decl.len > 0 and info.name != InvalidCIdent: + # only emit types that need a definition/declaration emitCType(f, c, info) var marker: PackedSet[CTypeId] From 1529a879eec253bd7810214b8537bb787f9f182a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:50:28 +0100 Subject: [PATCH 023/395] cgen2: fix int literals in declarations --- compiler/vm/cgen2.nim | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 032e9e1e306..6b3fb23a11f 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -115,7 +115,7 @@ type cdnkWeakType # a "weak" reference to a type, meaning that a definition of # the type doesn't have to be present in the translation unit - cdnkIntLit # a unsigned integer literal (`a` encodes the high and `b` the low bits) + cdnkIntLit # a unsigned integer literal (`a` encodes the low and `b` the high bits) cdnkFuncPtr # function-ptr type decl cdnkPtr # XXX: strictly speaking, the `*` is part of the declarator and not of the specifier @@ -215,7 +215,7 @@ func addField(decl: var CDecl, cache: var IdentCache, typ: CTypeId, name: sink s decl.addField(typ, cache.getOrIncl(name)) func addIntLit(decl: var CDecl, i: uint64) {.inline.} = - decl.add cdnkIntLit, uint32(i shl 32), uint32(i and 0xFFFFFFFF'u64) + decl.add cdnkIntLit, uint32(i and 0xFFFFFFFF'u64), uint32(i shr 32) type CTypeMap = Table[TypeKey, CTypeId] @@ -1037,7 +1037,7 @@ proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl, pos: var int) = f.write c.idents[n.a.CIdent] of cdnkIntLit: - let val = (n.a.uint64 shl 32) or n.b.uint64 + let val = n.a.uint64 or (n.b.uint64 shl 32) f.write $val of cdnkBracket: From 77d15486efa20d739f193e71ff268b4782ac4987 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:51:27 +0100 Subject: [PATCH 024/395] cgen2: mark accessed record types as used --- compiler/vm/cgen2.nim | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 6b3fb23a11f..e78e06f885f 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -766,6 +766,12 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = let src = names[n.srcLoc] var ast = start().add(cnkDotExpr).add(src) + # accessing a record means that we need a complete type. While the type + # we're marking as used here isn't necessarily the type that holds the + # field we're accessing (i.e. inheritance is used), the base types are + # automatically also pulled in + c.m.useType(typId) + if field.sym != NoneSymbol: discard ast.ident(c.gl.idents, mangledName(c.env.syms[field.sym].decl)) else: From 0c9508bd479f8ea26feb6b1c6f392a30b7343fe6 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:52:12 +0100 Subject: [PATCH 025/395] cgen2: fix "missing identifier" errors Introduce struct names into the ordinary name-space via a `typedef` --- compiler/vm/cgen2.nim | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index e78e06f885f..7bdb1e00fa6 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -1071,7 +1071,12 @@ proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo) = let kind = info.decl[0].kind case kind of cdnkStruct, cdnkUnion: + # emit the definition as ``typedef struct {} X;`` in order to make the + # identifier available in the ordinary name-space. This removes the need + # to specify the name-space on every usage (less generated code) + f.write "typedef " emitCDecl(f, c, info.decl, pos) + f.write fmt" {c.idents[info.name]}" of cdnkBracket: f.write "typedef " pos = 1 From 28fe01a2b19608dc3d14cafd0b07f44623bf94b2 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:53:25 +0100 Subject: [PATCH 026/395] cgen2: don't use a weak-dep for an `UncheckedArray`'s element type --- compiler/vm/cgen2.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 7bdb1e00fa6..1258682eb37 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -354,7 +354,7 @@ func genCTypeDecl(c: var TypeGenCtx, t: TypeId): CDecl = of tnkUncheckedArray: result.add cdnkBracket - result.addWeakType(c, c.env.types.elemType(t)) + result.add cdnkType, c.requestType(c.env.types.elemType(t)).uint32 # ``SEQ_DECL_SIZE`` is a macro defined in ``nimbase.h`` result.add cdnkIdent, c.cache.getOrIncl("SEQ_DECL_SIZE").uint32 From 8def3bf6bfce7446c981b8cd22d53e4db8959cf6 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:54:32 +0100 Subject: [PATCH 027/395] cgen2: generate proc header upfront --- compiler/vm/cgen2.nim | 13 ++++++++----- compiler/vm/irtypes.nim | 9 ++++++++- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 1258682eb37..dbd3795cfd7 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -1143,6 +1143,11 @@ func initGlobalContext*(c: var GlobalGenCtx, env: IrEnv) = swap(gen.cache, c.idents) + # create the procedure headers + # TODO: use ``setLen`` + [] + for id in env.procs.items: + c.funcs.add genCProcHeader(c, env.procs, id) + proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalGenCtx, env: IrEnv, procs: openArray[(ProcId, IrStore3)]) = let f = open(filename.string, fmWrite) defer: f.close() @@ -1222,9 +1227,7 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG # generate all procedure forward declarations for id in mCtx.funcs.items: #echo "decl: ", sym.name.s, " at ", conf.toFileLineCol(sym.info) - let hdr = genCProcHeader(ctx, env.procs, id) - - writeDecl(f, ctx, hdr, env.procs[id].decl) + writeDecl(f, ctx, ctx.funcs[id.toIndex], env.procs[id].decl) for id in mCtx.syms.items: let sym = env.syms[id] @@ -1248,8 +1251,8 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG let (id, _) = procs[i] prc = env.procs[id] - let hdr = genCProcHeader(ctx, env.procs, id) - writeDef(f, ctx, hdr, prc.decl) + + writeDef(f, ctx, ctx.funcs[id.toIndex], prc.decl) try: emitCAst(f, ctx, it) except: diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index 77bac103398..a229b8582cb 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -873,4 +873,11 @@ func numParams*(e: ProcedureEnv, p: ProcId): int = iterator params*(e: ProcedureEnv, p: ProcId): tuple[name: lent string, typ: TypeId] = for n, t in e[p].params.items: - yield (n, t) \ No newline at end of file + yield (n, t) + +iterator items*(e: ProcedureEnv): ProcId = + var i = 0 + let L = e.procs.len + while i < L: + yield toId(i, ProcId) + inc i From a4d36568eef9d6b7b7596a94e21917063a1e2aab Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:55:06 +0100 Subject: [PATCH 028/395] fix: parameter aliasing issue with `genCProcHeader` --- compiler/vm/cgen2.nim | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index dbd3795cfd7..6b9b91ecd4f 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -477,27 +477,27 @@ func gen(c: GenCtx, irs: IrStore3, n: IRIndex): CAst = c.names[n] #"gen_MISSING" -func mapTypeV3(c: GlobalGenCtx, t: TypeId): CTypeId +func mapTypeV3(t: TypeId): CTypeId func mapTypeV2(c: var GenCtx, t: TypeId): CTypeId = # TODO: unfinished c.m.useType(t) # mark the type as used - mapTypeV3(c.gl, t) + mapTypeV3(t) -func mapTypeV3(c: GlobalGenCtx, t: TypeId): CTypeId = +func mapTypeV3(t: TypeId): CTypeId = if t != NoneType: # XXX: maybe just have a ``NoneType`` -> ``VoidCType`` mapping in the table instead? CTypeId(t) else: VoidCType -func genCProcHeader(c: var GlobalGenCtx, env: ProcedureEnv, s: ProcId): CProcHeader = - result.returnType = mapTypeV3(c, env.getReturnType(s)) +func genCProcHeader(idents: var IdentCache, env: ProcedureEnv, s: ProcId): CProcHeader = + result.returnType = mapTypeV3(env.getReturnType(s)) result.args.newSeq(env.numParams(s)) var i = 0 for p in env.params(s): - result.args[i] = (mapTypeV3(c, p.typ), c.idents.getOrIncl(p.name)) + result.args[i] = (mapTypeV3(p.typ), idents.getOrIncl(p.name)) inc i @@ -1146,7 +1146,7 @@ func initGlobalContext*(c: var GlobalGenCtx, env: IrEnv) = # create the procedure headers # TODO: use ``setLen`` + [] for id in env.procs.items: - c.funcs.add genCProcHeader(c, env.procs, id) + c.funcs.add genCProcHeader(c.idents, env.procs, id) proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalGenCtx, env: IrEnv, procs: openArray[(ProcId, IrStore3)]) = let f = open(filename.string, fmWrite) From 4ab13a14913a7051d1940ba6a396de54fc00ff07 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:56:22 +0100 Subject: [PATCH 029/395] cgen2: fix types without identifiers not being searched for dependencies --- compiler/vm/cgen2.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 6b9b91ecd4f..d39dd82e895 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -1206,7 +1206,7 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG proc emitWithDeps(f: File, c: GlobalGenCtx, t: CTypeId, marker: var PackedSet[CTypeId]) = let info {.cursor.} = c.ctypes[t.int] - if info.name == InvalidCIdent or marker.containsOrIncl(t): + if marker.containsOrIncl(t): # nothing to do return From dfceb9487fb3dd7e4622b8d6be060c339256acd9 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:57:32 +0100 Subject: [PATCH 030/395] cgen2: pre-compute procedure identifiers --- compiler/vm/cgen2.nim | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index d39dd82e895..3106e1d0d47 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -136,6 +136,8 @@ type name: CIdent # CProcHeader = object + ident: CIdent + returnType: CTypeId args: seq[tuple[typ: CTypeId, name: CIdent]] @@ -492,6 +494,7 @@ func mapTypeV3(t: TypeId): CTypeId = VoidCType func genCProcHeader(idents: var IdentCache, env: ProcedureEnv, s: ProcId): CProcHeader = + result.ident = idents.getOrIncl(mangledName(env[s].decl)) result.returnType = mapTypeV3(env.getReturnType(s)) result.args.newSeq(env.numParams(s)) @@ -531,6 +534,10 @@ func ident(c: var CAstBuilder, idents: var IdentCache, name: string): var CAstBu result = c c.ast.add cnkIdent, idents.getOrIncl(name).uint32 +func ident(c: var CAstBuilder, ident: CIdent): var CAstBuilder = + result = c + c.ast.add cnkIdent, ident.uint32 + func intLit(c: var CAstBuilder, v: BiggestInt): var CAstBuilder = result = c # TODO: int literals need some more development @@ -723,7 +730,7 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = if prc.magic == mNone: useFunction(c.m, n.procId) - names[i] = start().ident(c.gl.idents, mangledName(prc.decl)).fin() + names[i] = start().ident(c.gl.funcs[toIndex(n.procId)].ident).fin() of ntkLocal: let (kind, typ, sym) = irs.getLocal(i) if sym == NoneSymbol: @@ -1103,7 +1110,7 @@ proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo) = proc writeDecl(f: File, c: GlobalGenCtx, h: CProcHeader, decl: Declaration) = emitType(f, c, h.returnType) f.write(" ") - f.write(mangledName(decl)) + f.write(c.idents[h.ident]) f.write("(") for i, it in h.args.pairs: if i > 0: @@ -1116,7 +1123,7 @@ proc writeDecl(f: File, c: GlobalGenCtx, h: CProcHeader, decl: Declaration) = proc writeDef(f: File, c: GlobalGenCtx, h: CProcHeader, decl: Declaration) = emitType(f, c, h.returnType) f.write(" ") - f.write(mangledName(decl)) + f.write(c.idents[h.ident]) f.write("(") for i, it in h.args.pairs: if i > 0: From 47a0d27aaeb3d69ff43db53f1a55e089bc199c4b Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:58:23 +0100 Subject: [PATCH 031/395] cgen2: overhaul how types are emitted * correctly handle imported types that require includes * no more identifier tags everywhere in the output The change does make the code a bit more complex and slower. --- compiler/vm/cgen2.nim | 131 ++++++++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 38 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 3106e1d0d47..f9f3bb386bc 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -410,8 +410,14 @@ func genCTypeInfo(gen: var TypeGenCtx, env: TypeEnv, id: TypeId): CTypeInfo = result = CTypeInfo(name: gen.cache.getOrIncl(name)) else: - let decl = genCTypeDecl(gen, id) - result = CTypeInfo(decl: decl, name: getTypeName(gen.cache, id, t, Declaration())) + let name = getTypeName(gen.cache, id, t, Declaration()) + var decl = genCTypeDecl(gen, id) + + # set the identifier field for struct and union types: + if decl[0].kind in {cdnkStruct, cdnkUnion}: + decl[1] = (cdnkIdent, name.uint32, 0'u32) + + result = CTypeInfo(decl: decl, name: name) func useFunction(c: var ModuleCtx, s: ProcId) = @@ -1031,15 +1037,6 @@ proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl, pos: var int) = let info {.cursor.} = c.ctypes[n.a.uint32] assert info.name != InvalidCIdent - if info.decl.len > 0: - # only specify the namespace if the type has a declaration (it's an - # imported type otherwise) - f.write: - case info.decl[0].kind - of cdnkStruct: "struct " - of cdnkUnion: "union " - else: unreachable(info.decl[0].kind) - f.write c.idents[info.name] of cdnkPtr: @@ -1070,7 +1067,7 @@ proc emitCDecl(f: File, c: GlobalGenCtx, decl: CDecl) = var pos = 0 emitCDecl(f, c, decl, pos) -proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo) = +proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo, isFwd: bool) = var pos = 0 assert info.decl.len > 0, c.idents[info.name] @@ -1078,12 +1075,24 @@ proc emitCType(f: File, c: GlobalGenCtx, info: CTypeInfo) = let kind = info.decl[0].kind case kind of cdnkStruct, cdnkUnion: - # emit the definition as ``typedef struct {} X;`` in order to make the - # identifier available in the ordinary name-space. This removes the need - # to specify the name-space on every usage (less generated code) - f.write "typedef " - emitCDecl(f, c, info.decl, pos) - f.write fmt" {c.idents[info.name]}" + if isFwd: + # --> ``typdef struct X X;`` + # forward-declare the record type and make the identifier available in + # the ordinary namespace + f.write "typedef " + f.write: + case kind + of cdnkStruct: "struct" + of cdnkUnion: "union" + else: unreachable() + + f.write fmt" {c.idents[info.name]}" + f.write fmt" {c.idents[info.name]}" + pos = info.decl.len # mark the body as processed + else: + # definition requested + emitCDecl(f, c, info.decl, pos) + of cdnkBracket: f.write "typedef " pos = 1 @@ -1200,36 +1209,82 @@ proc emitModuleToFile*(conf: ConfigRef, filename: AbsoluteFile, ctx: var GlobalG for id in mCtx.syms.items: mCtx.useType(env.syms[id].typ) - let used = mCtx.types - - f.writeLine "#define NIM_INTBITS 64" # TODO: don't hardcode + # collect all types that we need to be defined in this translation unit (.c file) - # headers - for h in mCtx.headers.items: - f.writeLine fmt"#include {h}" + type TypeDef = tuple[fwd: bool, id: CTypeId] - # type section + func collectFwd(list: var seq[TypeDef], types: seq[CTypeInfo], id: CTypeId, marker, markerFwd: var PackedSet[CTypeId]) = + if id notin marker and not markerFwd.containsOrIncl(id): + # not defined nor forward declared + assert types[id.int].name != InvalidCIdent + list.add (true, id) - proc emitWithDeps(f: File, c: GlobalGenCtx, t: CTypeId, - marker: var PackedSet[CTypeId]) = - let info {.cursor.} = c.ctypes[t.int] - if marker.containsOrIncl(t): + func collectOrdered(list: var seq[TypeDef], types: seq[CTypeInfo], + id: CTypeId, marker, markerFwd: var PackedSet[CTypeId]) = + let info {.cursor.} = types[id.int] + if marker.containsOrIncl(id): # nothing to do return - # scan the type's body for non-weak dependencies and emit them first + # scan the type's body for dependencies and add them first for n in info.decl.items: - if n.kind == cdnkType: - emitWithDeps(f, c, n.a.CTypeId, marker) + case n.kind + of cdnkType: + # requires a definition + collectOrdered(list, types, n.a.CTypeId, marker, markerFwd) + of cdnkWeakType: + # only requires a forward declaration + collectFwd(list, types, n.a.CTypeId, marker, markerFwd) + else: + discard + + # XXX: the used headers could also be collected here, but that would grow + # the required state even more + + if info.name != InvalidCIdent: + # only collect types that have an identifier. The others don't need a + # typedef (they're inlined directly) and also don't/can't have header + # dependency information attached + list.add (false, id) + + var typedefs: seq[TypeDef] + var marker, markerFwd: PackedSet[CTypeId] + for it in mCtx.types.items: + collectOrdered(typedefs, ctx.ctypes, it, marker, markerFwd) + + marker.reset() # no longer needed + + # collect the header dependencies from the used types + # XXX: to be more efficient, writing out the header includes for the types + # could be combined with emitting the type definitions + + for _, id in typedefs.items: + let iface = env.types[id].iface + if iface != nil and lfHeader in iface.loc.flags: + echo ctx.idents[ctx.ctypes[id.int].name], ": ", iface.loc.flags + mCtx.headers.incl getStr(iface.annex.path) + + # ----- start of the emit logic ----- - if info.decl.len > 0 and info.name != InvalidCIdent: - # only emit types that need a definition/declaration - emitCType(f, c, info) + f.writeLine "#define NIM_INTBITS 64" # TODO: don't hardcode + + # headers + for h in mCtx.headers.items: + f.writeLine fmt"#include {h}" + + # type section - var marker: PackedSet[CTypeId] - for it in used.items: - emitWithDeps(f, ctx, it, marker) + for fwd, id in typedefs.items: + let info = ctx.ctypes[id.int] + # imported types don't have a body + if info.decl.len > 0: + if not fwd and info.decl[0].kind in {cdnkStruct, cdnkUnion} and id notin markerFwd: + # struct and unions types always use a forward-declaration bacause the + # emitted typedef makes the identifier available in the ordinary + # name-space + emitCType(f, ctx, ctx.ctypes[id.int], isFwd=true) + emitCType(f, ctx, ctx.ctypes[id.int], isFwd=fwd) # generate all procedure forward declarations for id in mCtx.funcs.items: From a60b3d5c4934bb2d35620babaa09b43c390adaf4 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 16:59:08 +0100 Subject: [PATCH 032/395] vmir: make node-traces togglable at compile-time --- compiler/vm/vmir.nim | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 06c82b4f691..1768fb9a1a3 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -15,6 +15,8 @@ import compiler/vm/irtypes export irtypes +const useNodeTraces {.booldefine.} = false + type IRIndex* = int const InvalidIndex* = -1 # XXX: it would be better for `InvalidIndex` to be '0' @@ -298,10 +300,12 @@ template missingImpl*() = assert false func traceFor*(s: IrStore3, i: IRIndex): seq[StackTraceEntry] = - s.sources[i] + when useNodeTraces: + s.sources[i] func traceForLocal*(s: IrStore3, i: int): seq[StackTraceEntry] = - s.localSrc[i] + when useNodeTraces: + s.localSrc[i] # version 1 @@ -364,8 +368,9 @@ func add(x: var IrStore, n: sink IrNode2): IRIndex = func add(x: var IrStore3, n: sink IrNode3): IRIndex = result = x.nodes.len.IRIndex x.nodes.add n - {.noSideEffect.}: - x.sources.add getStackTraceEntries() + when useNodeTraces: + {.noSideEffect.}: + x.sources.add getStackTraceEntries() ## version 2/3 @@ -374,8 +379,9 @@ func genLocal*(c: var IrStore3, kind: LocalKind, typ: TypeId): int = assert typ != NoneType c.locals.add((kind, typ, NoneSymbol)) result = c.locals.high - {.noSideEffect.}: - c.localSrc.add(getStackTraceEntries()) + when useNodeTraces: + {.noSideEffect.}: + c.localSrc.add(getStackTraceEntries()) func genLocal*(c: var IrStore3, kind: LocalKind, typ: TypeId, sym: SymId): int = ## A local that has a symbol @@ -386,8 +392,9 @@ func genLocal*(c: var IrStore3, kind: LocalKind, typ: TypeId, sym: SymId): int = # for the local? c.locals.add((kind, typ, sym)) result = c.locals.high - {.noSideEffect.}: - c.localSrc.add(getStackTraceEntries()) + when useNodeTraces: + {.noSideEffect.}: + c.localSrc.add(getStackTraceEntries()) func irContinue*(c: var IrStore3) = discard c.add(IrNode3(kind: ntkContinue)) @@ -1891,8 +1898,9 @@ func replace*(cr: var IrCursor) = func insert(cr: var IrCursor, n: sink IrNode3): IRIndex = cr.newNodes.add n - {.cast(noSideEffect).}: - cr.traces.add getStackTraceEntries() + when useNodeTraces: + {.cast(noSideEffect).}: + cr.traces.add getStackTraceEntries() if cr.actions.len > 0 and cr.actions[^1][1].a == cr.pos: # append to the insertion or replacement @@ -2069,7 +2077,9 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = template insertNode(p: int) = patchTable[oldLen + np] = p ir.nodes.insert(cr.newNodes[np], p) - ir.sources.insert(cr.traces[np], p) + + when useNodeTraces: + ir.sources.insert(cr.traces[np], p) patch(ir.nodes[p], patchTable) @@ -2083,7 +2093,8 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = # replace the node ir.nodes[slice.b + currOff] = cr.newNodes[np] - ir.sources[slice.b + currOff] = cr.traces[np] + when useNodeTraces: + ir.sources[slice.b + currOff] = cr.traces[np] inc np # patch the replaced node From e1ff74fe79ae87cfab4b6067c13dba9ad4dbc95a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:00:14 +0100 Subject: [PATCH 033/395] vmir: introduce `bcAccessEnv` Required for the upcoming parameter handling rework to work without having to already transform procedure in `irgen`. --- compiler/vm/irgen.nim | 11 +++++++++-- compiler/vm/vmir.nim | 2 ++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 46a99f80aba..e053b72d3bd 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -28,7 +28,7 @@ import results ] -from compiler/vm/vmaux import findRecCase, findMatchingBranch +from compiler/vm/vmaux import findRecCase, findMatchingBranch, getEnvParam from compiler/vm/vmdef import unreachable # XXX: temporary import; needed for ``PassEnv`` @@ -929,7 +929,14 @@ proc genRdVar(c: var TCtx; n: PNode;): IRIndex = if sfGlobal in s.flags: c.irGlobal(s) elif s.kind == skParam: - c.irParam(s) + if s.position < c.prc.sym.typ.len - 1: + c.irParam(s) + else: + assert tfCapturesEnv in c.prc.sym.typ.flags + # the parameter is the hidden environment parameter + let envT = c.types.requestType(getEnvParam(c.prc.sym).typ) + c.irs.irCall(bcAccessEnv, envT) + elif s.kind == skResult: c.irs.irLocal(0) # TODO: don't hardcode else: c.irs.irLocal(c.prc.local(s)) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 1768fb9a1a3..92be40458de 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -195,6 +195,8 @@ type bcNew + bcAccessEnv ## access the current procedure's closure environment + bcUnlikely # XXX: alternatively, turn `system.unlikelyProc` into a .compilerproc IrNode3* = object From 27b4d5f7f6db5f7c982cd7e9d935fe78c6e8d479 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:01:11 +0100 Subject: [PATCH 034/395] irgen: don't use a `deref` on `var openArray` access --- compiler/vm/irgen.nim | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index e053b72d3bd..24f8fc7a263 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -1324,7 +1324,15 @@ proc gen(c: var TCtx; n: PNode; dest: var IRIndex) = of nkDotExpr: dest = genObjAccess(c, n) of nkCheckedFieldExpr: dest = genCheckedObjAccess(c, n) of nkBracketExpr: dest = genArrAccess(c, n) - of nkDerefExpr, nkHiddenDeref: dest = genDeref(c, n) + of nkDerefExpr: dest = genDeref(c, n) + of nkHiddenDeref: + if n[0].typ.skipTypes({tyVar, tyLent}).kind == tyOpenArray: + # don't generate a 'deref' for openArray + # XXX: sem shouldn't introduce a ``nkHiddenDeref`` for openArrays in the + # first place + dest = genx(c, n[0]) + else: + dest = genDeref(c, n) of nkAddr, nkHiddenAddr: dest = genAddr(c, n) of nkIfStmt, nkIfExpr: let fwd = c.irs.irJoinFwd() From 9ce37e6131c6bf9ce1de4a47561ff8d2927a0b65 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:02:07 +0100 Subject: [PATCH 035/395] irgen: don't use `PType`s where not necessary --- compiler/vm/irgen.nim | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 24f8fc7a263..2b9b0e65d2c 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -141,9 +141,8 @@ func irLit(c: var TCtx, n: PNode): IRIndex = c.irs.irLit((n, typ)) -proc irImm(c: var TCtx, val: SomeInteger): IRIndex = - # XXX: getSysType has side-effects - c.irLit newIntTypeNode(BiggestInt(val), c.graph.getSysType(unknownLineInfo, tyInt)) +func irImm(c: var TCtx, val: SomeInteger): IRIndex = + c.irs.irLit (newIntNode(nkIntLit, BiggestInt(val)), c.passEnv.sysTypes[tyInt]) template tryOrReturn(code): untyped = try: @@ -195,6 +194,11 @@ proc getTemp(cc: var TCtx; tt: PType): IRIndex = let id = cc.genLocal(lkTemp, tt) cc.irs.irLocal(id) +func irNull(c: var TCtx, t: TypeId): IRIndex = + # XXX: maybe `irNull` should be a dedicated IR node? + let id = c.irs.genLocal(lkTemp, t) + c.irs.irLocal(id) + func irNull(c: var TCtx, t: PType): IRIndex = # XXX: maybe `irNull` should be a dedicated IR node? let id = c.genLocal(lkTemp, t) @@ -1247,7 +1251,7 @@ proc genTupleConstr(c: var TCtx, n: PNode): IRIndex = proc genClosureConstr(c: var TCtx, n: PNode): IRIndex = let tmp = c.genx(n[0]) let env = - if n[1].kind == nkNilLit: c.irNull(c.graph.getSysType(n.info, tyNil)) + if n[1].kind == nkNilLit: c.irNull(c.passEnv.sysTypes[tyPointer]) else: c.genx(n[1]) c.irs.irCall(bcNewClosure, c.types.requestType(n.typ), tmp, env) From 35c7c2a3178e4949357839177628ef8704598b43 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:03:27 +0100 Subject: [PATCH 036/395] cbackend2: move error logging to separate procedure --- compiler/vm/cbackend2.nim | 50 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 46da5cd006a..c7fbba4127f 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -295,6 +295,34 @@ proc newPassEnv(g: ModuleGraph, tgen: var DeferredTypeGen, syms: var SymbolEnv, for t in { tyVoid, tyInt..tyFloat64, tyBool, tyChar, tyString, tyCstring, tyPointer }.items: result.sysTypes[t] = tgen.requestType(g.getSysType(unknownLineInfo, t)) +proc logError(conf: ConfigRef, ir: IrStore3, prc: ProcId, env: IrEnv, pos: (bool, int)) = + let sym = env.procs.orig.getOrDefault(prc) + if sym != nil: + echo conf.toFileLineCol(sym.info) + else: + echo "???" + + if pos[0]: + echo "Node added at:" + echoTrace(ir, pos[1]) + + if pos[0]: + echo "Node position: ", pos[1] + + echo "IR:" + printIr(ir, env, calcStmt(ir)) + + printTypes(ir, env) + +template logError(ir: IrStore3, env: IrEnv, prc: ProcId, code: untyped) = + try: + code + except PassError as e: + logError(conf, ir, prc, env, (true, e.n)) + raise + except: + logError(conf, ir, prc, env, (false, 0)) + raise proc generateCode*(g: ModuleGraph) = ## The backend's entry point. Orchestrates code generation and linking. If @@ -381,7 +409,7 @@ proc generateCode*(g: ModuleGraph) = for i in 0.. Date: Wed, 17 Aug 2022 17:04:25 +0100 Subject: [PATCH 037/395] vmir: don't cache extra join point information Less work for `IrStore3` updates. --- compiler/vm/vmir.nim | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 92be40458de..06650756689 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -255,8 +255,8 @@ type IrStore3* = object nodes: seq[IrNode3] - joins: seq[(IRIndex, bool)] # joint point id -> ir position #syms: seq[PSym] + numJoins: int literals: seq[Literal] locals: seq[(LocalKind, TypeId, SymId)] @@ -518,19 +518,18 @@ func irJoinFwd*(c: var IrStore3): JoinPoint = ## TODO: document ## Helper to make modifications of the IR easier. During IR-gen when the ## target is not yet known (e.g. when generating an if-branch) - c.joins.add (0, false) - result = c.joins.high + result = c.numJoins + inc c.numJoins func irLoopJoin*(c: var IrStore3): JoinPoint = - result = c.joins.len + result = c.numJoins let pos = c.add IrNode3(kind: ntkJoin, joinPoint: result) - c.joins.add (pos, true) + inc c.numJoins func irJoin*(c: var IrStore3, jp: JoinPoint) = ## TODO: document ## A join point let pos = c.add(IrNode3(kind: ntkJoin, joinPoint: jp)) - c.joins[jp][0] = pos func keys*(x: seq): Slice[int] = 0..x.high @@ -538,7 +537,7 @@ func keys*(x: seq): Slice[int] = func irBranch*(c: var IrStore3, cond: IRIndex, target: JoinPoint): IRIndex {.discardable.} = ## TODO: document ## A branch - assert target in c.joins.keys + assert target in 0.. 0 and ir.nodes[^1].kind in {ntkGoto, ntkGotoCont} @@ -622,10 +624,7 @@ func getLit*(irs: IrStore3, n: IrNode3): lent Literal = irs.literals[n.litIdx] func isLoop*(ir: IrStore3, j: JoinPoint): bool = - ir.joins[j][1] - -func position*(ir: IrStore3, j: JoinPoint): IRIndex = - ir.joins[j][0] + false#ir.joins[j][1] func kind*(n: IrNode3): IrNodeKind3 {.inline.} = n.kind @@ -2118,8 +2117,4 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = if p < oldLen: # patch the remaining nodes - process(ir, p, oldLen) - - # adjust the join points - for jp in ir.joins.mitems: - jp[0] = patchTable[jp[0]] \ No newline at end of file + process(ir, p, oldLen) \ No newline at end of file From b92632a009c0b539ddb8cce1dfad37a2b96688f7 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:05:07 +0100 Subject: [PATCH 038/395] vmir: don't use closure in `patchIdx` It accidentally required a copy of `patchTable` which lead to huge slow downs (~30s !! when doing a bootstrap run). --- compiler/vm/vmir.nim | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 06650756689..554870357c4 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -1972,11 +1972,14 @@ func newLocal*(cr: var IrCursor, kind: LocalKind, t: TypeId): int = func insertLocalRef*(cr: var IrCursor, name: int): IRIndex = cr.insert IrNode3(kind: ntkLocal, local: name) +func patchIdx(n: var IRIndex, patchTable: seq[IRIndex]) = + assert patchTable[n] != -1, "node was removed" + n = patchTable[n] func patch(n: var IrNode3, patchTable: seq[IRIndex]) = - func patchIdx(n: var IRIndex) = - assert patchTable[n] != -1, "node was removed" - n = patchTable[n] + + template patchIdx(n: var IRIndex) = + patchIdx(n, patchTable) case n.kind of ntkCall: From 8e4f305a535462ef8fedcd079ddb89b53611fcdf Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:06:05 +0100 Subject: [PATCH 039/395] vmir: introduce `ntkParam` No longer use `ntkSym` to reference parameters --- compiler/vm/cbackend2.nim | 3 ++- compiler/vm/cgen2.nim | 5 +++++ compiler/vm/irdbg.nim | 4 +++- compiler/vm/irgen.nim | 2 +- compiler/vm/irpasses.nim | 2 ++ compiler/vm/irtypes.nim | 4 ++++ compiler/vm/vmir.nim | 19 ++++++++++++++++--- 7 files changed, 33 insertions(+), 6 deletions(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index c7fbba4127f..19759dc81ed 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -374,7 +374,7 @@ proc generateCode*(g: ModuleGraph) = if g.getBody(it).kind == nkEmpty: # a quick fix to not run `irgen` for 'importc'ed procs - moduleProcs[realIdx].add((sId, IrStore3())) + moduleProcs[realIdx].add((sId, IrStore3(owner: sId))) continue let ir = generateCodeForProc(c, it) @@ -382,6 +382,7 @@ proc generateCode*(g: ModuleGraph) = #doAssert mIdx == realIdx moduleProcs[realIdx].add((sId, c.unwrap ir)) + moduleProcs[realIdx][^1][1].owner = sId # flush deferred types already to reduce memory usage a bit c.types.flush(c.symEnv, g.config) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index f9f3bb386bc..9cc5e1de319 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -731,6 +731,11 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = useType(c.m, sym.typ) names[i] = start().ident(c.gl.idents, mangledName(sym.decl)).fin() + + of ntkParam: + let name = c.gl.funcs[toIndex(irs.owner)].args[n.paramIndex].name + names[i] = start().ident(name).fin() + of ntkProc: let prc = c.env.procs[n.procId] if prc.magic == mNone: diff --git a/compiler/vm/irdbg.nim b/compiler/vm/irdbg.nim index b8363cfaa88..ef7356fc83d 100644 --- a/compiler/vm/irdbg.nim +++ b/compiler/vm/irdbg.nim @@ -9,7 +9,7 @@ func calcStmt*(irs: IrStore3): seq[bool] = var i = 0 for n in irs.nodes: case n.kind - of ntkSym, ntkLocal, ntkJoin, ntkLit, ntkGoto, ntkProc: + of ntkSym, ntkLocal, ntkJoin, ntkLit, ntkGoto, ntkProc, ntkParam: discard of ntkCall: for it in n.args: @@ -59,6 +59,8 @@ proc printIr*(irs: IrStore3, e: IrEnv, exprs: seq[bool]) = line = fmt"sym {e.syms[irs.sym(n)].decl.name}" of ntkProc: line = fmt"proc '{e.procs[n.procId].decl.name}'" + of ntkParam: + line = fmt"param {n.paramIndex}: '{e.procs.param(irs.owner, n.paramIndex).name}'" of ntkAsgn: case n.asgnKind of askCopy, askDiscr: diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 2b9b0e65d2c..4c4a2a468a2 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -124,7 +124,7 @@ func irSym(c: var TCtx, sym: PSym): IRIndex = c.irs.irSym(id) func irParam(c: var TCtx, sym: PSym): IRIndex = - c.irSym(sym) + c.irs.irParam(sym.position.uint32) func irGlobal(c: var TCtx, sym: PSym): IRIndex = c.irSym(sym) diff --git a/compiler/vm/irpasses.nim b/compiler/vm/irpasses.nim index aa10087b78b..9a6efdc6124 100644 --- a/compiler/vm/irpasses.nim +++ b/compiler/vm/irpasses.nim @@ -504,6 +504,8 @@ func computeTypes*(ir: IrStore3, env: IrEnv): seq[TypeId] = let s = ir.sym(n) customAssert s != NoneSymbol, i result[i] = env.syms[s].typ + of ntkParam: + result[i] = env.procs.param(ir.owner, n.paramIndex).typ of ntkUse, ntkConsume: result[i] = result[n.srcLoc] of ntkLocal: diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index a229b8582cb..670c86670a7 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -868,6 +868,10 @@ func finish*(e: var ProcedureEnv, types: var DeferredTypeGen) = func getReturnType*(e: ProcedureEnv, p: ProcId): TypeId = e[p].returnType +func param*(e: ProcedureEnv, p: ProcId, i: Natural): auto = + # XXX: costly string copy since ``lent`` is not used here + e[p].params[i] + func numParams*(e: ProcedureEnv, p: ProcId): int = e[p].params.len diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index 554870357c4..b2da44556dc 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -150,6 +150,7 @@ type ntkLocEnd ntkProc # reference to a procedure + ntkParam # reference to a parameter ntkSym ntkRoot # a handle ntkLocal # references a local @@ -221,6 +222,8 @@ type local: int of ntkProc: procId: ProcId + of ntkParam: + param: int of ntkAddr, ntkDeref: addrLoc: PathIndex of ntkGoto, ntkGotoLink: @@ -263,6 +266,9 @@ type localSrc: seq[seq[StackTraceEntry]] sources: seq[seq[StackTraceEntry]] # the stack trace of where each node was added + # XXX: to temporarily make things easier, the owning procedure's ID is stored here (will be moved elsewhere later) + owner*: ProcId + IrEnv* = object ## syms*: SymbolEnv @@ -430,9 +436,9 @@ func irTemp*(c: var IrStore, typ: VmTypeId): IRIndex = result = c.add(IrNode2(kind: inktTemp, tmpName: c.nextTemp, typ: typ)) inc c.nextTemp -func irParam*(c: var IrStore, pos: uint32): IRIndex = +func irParam*(c: var IrStore3, pos: uint32): IRIndex = ## A path component. Refers to a parameter - c.add(IrNode2(kind: inktParam, param: pos)) + c.add IrNode3(kind: ntkParam, param: pos.int) proc irConst*(c: var IrStore, i: uint32): IRIndex = ## Load a simple constant with the given literal index `i` @@ -614,6 +620,9 @@ func sym*(c: IrStore3, n: IrNode3): SymId = func procId*(n: IrNode3): ProcId = n.procId +func paramIndex*(n: IrNode3): int = + n.param + func getLocal*(irs: IrStore3, n: IRIndex): (LocalKind, TypeId, SymId) = irs.locals[irs.nodes[n].local] @@ -1914,6 +1923,9 @@ func insertSym*(cr: var IrCursor, sym: SymId): IRIndex = assert sym != NoneSymbol cr.insert IrNode3(kind: ntkSym, sym: sym) +func insertParam*(cr: var IrCursor, param: Natural): IRIndex = + cr.insert IrNode3(kind: ntkParam, param: param) + func insertProcSym*(cr: var IrCursor, prc: ProcId): IRIndex = cr.insert IrNode3(kind: ntkProc, procId: prc) @@ -2007,7 +2019,8 @@ func patch(n: var IrNode3, patchTable: seq[IRIndex]) = patchIdx(n.idx) of ntkJoin, ntkGoto, ntkSym, ntkLocal, ntkLocEnd, ntkImm, ntkGotoCont, - ntkContinue, ntkGotoLink, ntkLoad, ntkWrite, ntkRoot, ntkLit, ntkProc: + ntkContinue, ntkGotoLink, ntkLoad, ntkWrite, ntkRoot, ntkLit, ntkProc, + ntkParam: discard "nothing to patch" func inline*(cr: var IrCursor, other: IrStore3, sEnv: SymbolEnv, args: varargs[IRIndex]): IRIndex = From bdef70d9e05a1d5e09511398d13f4a0619d19a8c Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:07:29 +0100 Subject: [PATCH 040/395] fix: no cached types (`sysTypes`) being generated for uints --- compiler/vm/cbackend2.nim | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/vm/cbackend2.nim b/compiler/vm/cbackend2.nim index 19759dc81ed..923fd6f35ee 100644 --- a/compiler/vm/cbackend2.nim +++ b/compiler/vm/cbackend2.nim @@ -292,7 +292,7 @@ proc newPassEnv(g: ModuleGraph, tgen: var DeferredTypeGen, syms: var SymbolEnv, # XXX: is this case even possible discard#echo "missing type for type-bound operation" - for t in { tyVoid, tyInt..tyFloat64, tyBool, tyChar, tyString, tyCstring, tyPointer }.items: + for t in { tyVoid, tyInt..tyFloat64, tyUInt..tyUInt64, tyBool, tyChar, tyString, tyCstring, tyPointer }.items: result.sysTypes[t] = tgen.requestType(g.getSysType(unknownLineInfo, t)) proc logError(conf: ConfigRef, ir: IrStore3, prc: ProcId, env: IrEnv, pos: (bool, int)) = From 28a74e03360a437a4932254b562c237633fa4bb2 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:08:09 +0100 Subject: [PATCH 041/395] irtypes: add `skipVarOrLent` --- compiler/vm/irtypes.nim | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compiler/vm/irtypes.nim b/compiler/vm/irtypes.nim index 670c86670a7..517fffa0b89 100644 --- a/compiler/vm/irtypes.nim +++ b/compiler/vm/irtypes.nim @@ -347,6 +347,14 @@ func numFields*(env: TypeEnv, t: TypeId): int = assert env[t].kind == tnkRecord env[env[t].record].a.int +func skipVarOrLent*(env: TypeEnv, t: TypeId): TypeId = + if env[t].kind in {tnkVar, tnkLent}: + # a ``var var T`` (same with ``lent``) is not possible so no need to use + # a while loop + env[t].base + else: + t + func combine(lo, hi: uint32): uint64 {.inline.} From bf8e8709f53d1e4469e50d2eca21e1478b84e315 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:09:23 +0100 Subject: [PATCH 042/395] vmir: implement a more efficient version of `update` It's only takes 1/50 of the time! --- compiler/vm/vmir.nim | 121 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 119 insertions(+), 2 deletions(-) diff --git a/compiler/vm/vmir.nim b/compiler/vm/vmir.nim index b2da44556dc..4e1d28ed0e6 100644 --- a/compiler/vm/vmir.nim +++ b/compiler/vm/vmir.nim @@ -2065,9 +2065,10 @@ func inline*(cr: var IrCursor, other: IrStore3, sEnv: SymbolEnv, args: varargs[I patch(cr.newNodes[i], patchTable) -func update*(ir: var IrStore3, cr: sink IrCursor) = +func updateV1(ir: var IrStore3, cr: sink IrCursor) = ## Integrates the changes collected by the cursor `cr` into `ir` # XXX: non-descriptive name + # XXX: superseded and now unused var patchTable: seq[IRIndex] let oldLen = ir.len patchTable.newSeq(cr.nextIdx) # old ir len + insert node count @@ -2133,4 +2134,120 @@ func update*(ir: var IrStore3, cr: sink IrCursor) = if p < oldLen: # patch the remaining nodes - process(ir, p, oldLen) \ No newline at end of file + process(ir, p, oldLen) + +func moveMem[T](dst: var openArray[T], dstP, srcP: int, len: int) = + assert srcP + len <= dst.len + assert dstP + len <= dst.len + moveMem(addr dst[dstP], addr dst[srcP], len * sizeof(T)) + +func copyMem[T](dst: var openArray[T], src: openArray[T], dstP, srcP: int, len: int) = + assert srcP + len <= src.len + assert dstP + len <= dst.len + copyMem(addr dst[dstP], unsafeAddr src[srcP], len * sizeof(T)) + +func zeroMem[T](dst: var openArray[T]) = + if dst.len > 0: + zeroMem(addr dst[0], sizeof(T) * dst.len) + +func update*(ir: var IrStore3, cr: sink IrCursor) = + ## Integrates the changes collected by the cursor `cr` into `ir` + # XXX: non-descriptive name + + if cr.newNodes.len == 0: + return + + var patchTable: seq[IRIndex] + let oldLen = ir.len + patchTable.newSeq(cr.nextIdx) # old ir len + insert node count + + #ir.syms.apply(cr.newSyms) + ir.locals.apply(cr.newLocals) + ir.literals.apply(cr.newLiterals) + + let start = cr.actions[0][1].a + + var numNew = 0 + for kind, slice in cr.actions.items: + if kind: + numNew += slice.len - 1 + else: + numNew += slice.len + + ir.nodes.setLen(oldLen + numNew) + when useNodeTraces: + ir.sources.setLen(oldLen + numNew) + + let L = oldLen - start + moveMem(ir.nodes, ir.nodes.len - L, start, L) + when useNodeTraces: + moveMem(ir.sources, ir.sources.len - L, start, L) + + var copySrc = ir.nodes.len - L # where to take + var p = start # the position in the old node buffer where we're at + var insert = start # where to insert the next nodes + var np = 0 # the read position in the newNodes buffer + + # fill the patchTable for the nodes that aren't moved + for i in 0..= insert + slice.len + + else: # insert + discard + + inc insert, slice.len + + let + next = (if i+1 < cr.actions.len: cr.actions[i+1][1].a else: oldLen) + num = next - p # number of elements to move + # regions can overlap -> use ``moveMem`` + assert num >= 0 + moveMem(ir.nodes, insert, copySrc, num) + when useNodeTraces: + moveMem(ir.sources, insert, copySrc, num) + + let start = p + while p < next: + patchTable[p] = insert + (p - start) + inc p + + copySrc += num + insert += num + + # we've effectively done a ``move`` for all elements so we have to also zero + # the memory or else the garbage collector would clean up the nodes' GC'ed + # fields when collecting `cr.newNodes` + zeroMem(cr.newNodes) + when useNodeTraces: + zeroMem(cr.traces) + + # patch the node indices + for i, n in ir.nodes.mpairs: + patch(n, patchTable) \ No newline at end of file From a5e4b84ce4e7febc90e2194f69bc6a864cc02189 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:10:28 +0100 Subject: [PATCH 043/395] cgen2: implenent emit for `cnkTernary` --- compiler/vm/cgen2.nim | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 9cc5e1de319..4d9760c5eee 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -978,6 +978,14 @@ proc emitCAst(f: File, c: GlobalGenCtx, ast: CAst, pos: var int) = emitCAst(f, c, ast, pos) f.write "}" + of cnkTernary: + f.write "(" + emitCAst(f, c, ast, pos) # condition + f.write ") ? " + emitAndEscapeIf(f, c, ast, pos, {cnkIdent}) # a + f.write " : " + emitAndEscapeIf(f, c, ast, pos, {cnkIdent}) # b + else: f.write "EMIT_ERROR(\"missing " & $n.kind & "\")" From 8a84ba9a49f00c07a642e0d11d4baaab357fb959 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:11:32 +0100 Subject: [PATCH 044/395] cgen2: wrap arrays in structs --- compiler/vm/cgen2.nim | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 4d9760c5eee..d07d6217c85 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -205,6 +205,8 @@ func mangledName(d: Declaration): string = const BaseName = "Sub" ## the name of the field for the base type +const ArrayInnerName = "arr" + func add(decl: var CDecl, k: CDeclAstNodeKind; a, b: uint32 = 0) = decl.add((k, a, 0'u32)) @@ -325,12 +327,17 @@ func genCTypeDecl(c: var TypeGenCtx, t: TypeId): CDecl = result[0].a += count.uint32 of tnkArray: - result.add cdnkBracket - # not a weak-dep, since the a complete type is required + # --> struct { T data[len] } + # Arrays are wrapped in a struct which allows for them to appear as return-types and in assignments + result.add cdnkStruct, 1.uint32 + result.add cdnkEmpty + + result.add cdnkField + # not a weak-dep, since the complete type is required result.add cdnkType, c.requestType(c.env.types.elemType(t)).uint32 - # TODO: pass a valid ConfigRef - {.cast(noSideEffect).}: - result.addIntLit c.env.types.length(t).uint64 + result.add cdnkBracket + result.add cdnkIdent, c.cache.getOrIncl(ArrayInnerName).uint32 + result.addIntLit c.env.types.length(t).uint64 of tnkProc: case c.env.types.callConv(t) @@ -799,7 +806,11 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = names[i] = ast.fin() of ntkPathArr: - names[i] = start().add(cnkBracket).add(names[n.srcLoc]).add(names[n.arrIdx]).fin() + case c.env.types[c.types[n.srcLoc]].kind + of tnkArray: + names[i] = start().add(cnkBracket).add(cnkDotExpr).add(names[n.srcLoc]).ident(c.gl.idents, ArrayInnerName).add(names[n.arrIdx]).fin() + else: + names[i] = start().add(cnkBracket).add(names[n.srcLoc]).add(names[n.arrIdx]).fin() of ntkLit: names[i] = genLit(c, irs.getLit(n)) of ntkUse: From 2aa7ec17032ae844552edeb467cdc60f2cd1289c Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:12:17 +0100 Subject: [PATCH 045/395] cgen2: correctly handle identifier generation for locals --- compiler/vm/cgen2.nim | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index d07d6217c85..63c89a892fa 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -483,6 +483,8 @@ type GenCtx = object types: seq[TypeId] config: ConfigRef + localNames: seq[CIdent] + env: #[lent]# ptr IrEnv gl: GlobalGenCtx # XXX: temporary @@ -698,6 +700,8 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = var numStmts = 0 result.add cnkStmtList + c.localNames.setLen(0) + var tmp = 0 for typ, sym in irs.locals: if sym != NoneSymbol: @@ -710,14 +714,22 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = continue ]# + let name = + if sym != NoneSymbol: + mangledName(c.env.syms[sym].decl) + else: + let i = tmp + inc tmp + fmt"_tmp{i}" + + let ident = c.gl.idents.getOrIncl(name) + + # TODO: use ``setLen`` + [] + c.localNames.add(ident) + result.add cnkDef result.add cnkType, mapTypeV2(c, typ).uint32 - if sym != NoneSymbol: # TODO: don't test for temps like this - result.add c.gl.ident mangledName(c.env.syms[sym].decl) - - else: - result.add c.gl.ident(fmt"_tmp{tmp}") - inc tmp + result.add cnkIdent, ident.uint32 inc numStmts @@ -750,12 +762,7 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = names[i] = start().ident(c.gl.funcs[toIndex(n.procId)].ident).fin() of ntkLocal: - let (kind, typ, sym) = irs.getLocal(i) - if sym == NoneSymbol: - names[i] = start().ident(c.gl.idents, "_tmp" & $c.tmp).fin() - inc c.tmp - else: - names[i] = start().ident(c.gl.idents, mangledName(c.env.syms[sym].decl)).fin() + names[i] = start().ident(c.localNames[irs.getLocalIdx(i)]).fin() of ntkCall: if n.isBuiltIn: From 79f2b906ea55dea7351a0d40ab35e278da4f8434 Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:13:25 +0100 Subject: [PATCH 046/395] cgen2: collect used constants --- compiler/vm/cgen2.nim | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/compiler/vm/cgen2.nim b/compiler/vm/cgen2.nim index 63c89a892fa..5ec519df70f 100644 --- a/compiler/vm/cgen2.nim +++ b/compiler/vm/cgen2.nim @@ -742,9 +742,15 @@ proc genCode(c: var GenCtx, irs: IrStore3): CAst = let sId = irs.sym(n) let sym = c.env.syms[sId] # TODO: refactor - if sym.kind in {skVar, skLet} and sfGlobal in sym.flags: - c.m.syms.incl sId + case sym.kind + of skVar, skLet: + if sfGlobal in sym.flags: + c.m.syms.incl sId #discard mapTypeV3(c.gl, sym.typ) # XXX: temporary + of skConst: + c.m.syms.incl sId + else: + discard if sym.typ != NoneType: useType(c.m, sym.typ) From e2241279c3dfc8f08c947e8b757003987b09fc0a Mon Sep 17 00:00:00 2001 From: zerbina <100542850+zerbina@users.noreply.github.com> Date: Wed, 17 Aug 2022 17:14:27 +0100 Subject: [PATCH 047/395] omit compile-time-only parameters when translating calls/procedures --- compiler/vm/irgen.nim | 33 ++++++++++++++++++++++++++++++++- compiler/vm/irtypes.nim | 13 ++++++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/compiler/vm/irgen.nim b/compiler/vm/irgen.nim index 4c4a2a468a2..2c65e63ea0e 100644 --- a/compiler/vm/irgen.nim +++ b/compiler/vm/irgen.nim @@ -61,6 +61,11 @@ type PProc* = object locals: Table[int, int] + paramRemap: seq[int] # maps the parameter position given by ``TSym.position`` + ## to the index used for the IR. This is required in the case that + ## parameters are removed during the translation step (e.g. ``static T`` + ## parameters). The list is empty if the positions can be used directly + type TCtx* = object irs*: IrStore3 @@ -934,7 +939,13 @@ proc genRdVar(c: var TCtx; n: PNode;): IRIndex = c.irGlobal(s) elif s.kind == skParam: if s.position < c.prc.sym.typ.len - 1: - c.irParam(s) + let p = + if c.prc.paramRemap.len == 0: + s.position + else: + c.prc.paramRemap[s.position] + + c.irs.irParam(p.uint32) else: assert tfCapturesEnv in c.prc.sym.typ.flags # the parameter is the hidden environment parameter @@ -1541,6 +1552,26 @@ proc genProcBody(c: var TCtx; s: PSym, body: PNode) = let oldPrc = c.prc c.prc = p + # TODO: move this elsewhere + block: + var needsRemap = false + + # figure out the number of parameters that we want to skip + for i in 1..