-
Notifications
You must be signed in to change notification settings - Fork 97
/
compile.ml
8243 lines (7104 loc) · 300 KB
/
compile.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
(*
This module is the backend of the Motoko compiler. It takes a program in
the intermediate representation (ir.ml), and produces a WebAssembly module,
with Internet Computer extensions (customModule.ml). An important helper module is
instrList.ml, which provides a more convenient way of assembling WebAssembly
instruction lists, as it takes care of (1) source locations and (2) labels.
This file is split up in a number of modules, purely for namespacing and
grouping. Every module has a high-level prose comment explaining the concept;
this keeps documentation close to the code (a lesson learned from Simon PJ).
*)
open Ir_def
open Mo_values
open Mo_types
open Mo_config
open Wasm_exts.Ast
open Wasm.Types
open Source
(* Re-shadow Source.(@@), to get Stdlib.(@@) *)
let (@@) = Stdlib.(@@)
module G = InstrList
let (^^) = G.(^^) (* is this how we import a single operator from a module that we otherwise use qualified? *)
(* WebAssembly pages are 64kb. *)
let page_size = Int32.of_int (64*1024)
(*
Pointers are skewed (translated) -1 relative to the actual offset.
See documentation of module BitTagged for more detail.
*)
let ptr_skew = -1l
let ptr_unskew = 1l
(* Generating function names for functions parametrized by prim types *)
let prim_fun_name p stem = Printf.sprintf "%s<%s>" stem (Type.string_of_prim p)
(* Helper functions to produce annotated terms (Wasm.AST) *)
let nr x = { Wasm.Source.it = x; Wasm.Source.at = Wasm.Source.no_region }
let todo fn se x = Printf.eprintf "%s: %s" fn (Wasm.Sexpr.to_string 80 se); x
exception CodegenError of string
let fatal fmt = Printf.ksprintf (fun s -> raise (CodegenError s)) fmt
module StaticBytes = struct
(* A very simple DSL to describe static memory *)
type t_ =
| I32 of int32
| I64 of int64
| Seq of t
| Bytes of string
and t = t_ list
let i32s is = Seq (List.map (fun i -> I32 i) is)
let rec add : Buffer.t -> t_ -> unit = fun buf -> function
| I32 i -> Buffer.add_int32_le buf i
| I64 i -> Buffer.add_int64_le buf i
| Seq xs -> List.iter (add buf) xs
| Bytes b -> Buffer.add_string buf b
let as_bytes : t -> string = fun xs ->
let buf = Buffer.create 16 in
List.iter (add buf) xs;
Buffer.contents buf
end (* StaticBytes *)
module Const = struct
(* Literals, as used in constant values. This is a projection of Ir.Lit,
combining cases whose details we no longer care about.
Should be still precise enough to map to the cases supported by SR.t.
In other words: It is the smallest type that allows these three functions:
(* projection of Ir.list. NB: pure, no access to env *)
const_lit_of_lit : Ir.lit -> Const.lit (* NB: pure, no access to env *)
(* creates vanilla representation (e.g. to put in static data structures *)
vanilla_lit : E.env -> Const.lit -> i32
(* creates efficient stack representation *)
compile_lit : E.env -> Const.lit -> (SR.t, code)
*)
type lit =
| Vanilla of int32 (* small words, no static data, already in vanilla format *)
| BigInt of Big_int.big_int
| Bool of bool
| Word32 of int32
| Word64 of int64
| Float64 of Numerics.Float.t
| Blob of string
(* Constant known values.
These are values that
* are completely known constantly
* do not require Wasm code to be be executed (e.g. in `start`)
* can be used directly (e.g. Call, not CallIndirect)
* can be turned into Vanilla heap data on demand
See ir_passes/const.ml for what precisely we can compile as const now.
*)
type v =
| Fun of (unit -> int32) (* function pointer calculated upon first use *)
| Message of int32 (* anonymous message, only temporary *)
| Obj of (string * t) list
| Unit
| Array of t list (* also tuples, but not nullary *)
| Lit of lit
(* A constant known value together with a vanilla pointer.
Typically a static memory location, could be an unboxed scalar.
Filled on demand.
*)
and t = (int32 Lib.Promise.t * v)
let t_of_v v = (Lib.Promise.make (), v)
end (* Const *)
module SR = struct
(* This goes with the StackRep module, but we need the types earlier *)
(* Value representation on the stack:
Compiling an expression means putting its value on the stack. But
there are various ways of putting a value onto the stack -- unboxed,
tupled etc.
*)
type t =
| Vanilla
| UnboxedBool (* 0 or 1 *)
| UnboxedTuple of int
| UnboxedWord64
| UnboxedWord32
| UnboxedFloat64
| Unreachable
| Const of Const.t
let unit = UnboxedTuple 0
let bool = UnboxedBool
(* Because t contains Const.t, and that contains Const.v, and that contains
Const.lit, and that contains Big_int, we cannot just use normal `=`. So we
have to write our own equality.
This equalty is, I believe, used when joining branches. So for Const, we
just compare the promises, and do not descend into the Const.v. This is
conservative; the only downside is that if a branch returns different
Const.t with (semantically) the same Const.v we do not propagate that as
Const, but materialize before the branch.
Which is not really expected or important.
*)
let eq (t1 : t) (t2 : t) = match t1, t2 with
| Const (p1, _), Const (p2, _) -> p1 == p2
| _ -> t1 = t2
end (* SR *)
(*
** The compiler environment.
Of course, as we go through the code we have to track a few things; these are
put in the compiler environment, type `E.t`. Some fields are valid globally, some
only make sense locally, i.e. within a single function (but we still put them
in one big record, for convenience).
The fields fall into the following categories:
1. Static global fields. Never change.
Example: whether we are compiling with -no-system-api
2. Mutable global fields. Change only monotonically.
These are used to register things like functions. This should be monotone
in the sense that entries are only added, and that the order should not
matter in a significant way. In some instances, the list contains futures
so that we can reserve and know the _position_ of the thing before we have
to actually fill it in.
3. Static local fields. Never change within a function.
Example: number of parameters and return values
4. Mutable local fields. See above
Example: Name and type of locals.
**)
(* Before we can define the environment, we need some auxillary types *)
module E = struct
(* Utilities, internal to E *)
let reg (ref : 'a list ref) (x : 'a) : int32 =
let i = Wasm.I32.of_int_u (List.length !ref) in
ref := !ref @ [ x ];
i
let reserve_promise (ref : 'a Lib.Promise.t list ref) _s : (int32 * ('a -> unit)) =
let p = Lib.Promise.make () in (* For debugging with named promises, use s here *)
let i = Wasm.I32.of_int_u (List.length !ref) in
ref := !ref @ [ p ];
(i, Lib.Promise.fulfill p)
(* The environment type *)
module NameEnv = Env.Make(String)
module StringEnv = Env.Make(String)
module LabSet = Set.Make(String)
module FunEnv = Env.Make(Int32)
type local_names = (int32 * string) list (* For the debug section: Names of locals *)
type func_with_names = func * local_names
type lazy_function = (int32, func_with_names) Lib.AllocOnUse.t
type t = {
(* Global fields *)
(* Static *)
mode : Flags.compile_mode;
rts : Wasm_exts.CustomModule.extended_module option; (* The rts. Re-used when compiling actors *)
trap_with : t -> string -> G.t;
(* Trap with message; in the env for dependency injection *)
(* Per module fields (only valid/used inside a module) *)
(* Immutable *)
(* Mutable *)
func_types : func_type list ref;
func_imports : import list ref;
other_imports : import list ref;
exports : export list ref;
funcs : (func * string * local_names) Lib.Promise.t list ref;
func_ptrs : int32 FunEnv.t ref;
end_of_table : int32 ref;
globals : (global Lib.Promise.t * string) list ref;
global_names : int32 NameEnv.t ref;
named_imports : int32 NameEnv.t ref;
built_in_funcs : lazy_function NameEnv.t ref;
static_strings : int32 StringEnv.t ref;
end_of_static_memory : int32 ref; (* End of statically allocated memory *)
static_memory : (int32 * string) list ref; (* Content of static memory *)
static_memory_frozen : bool ref;
(* Sanity check: Nothing should bump end_of_static_memory once it has been read *)
static_roots : int32 list ref;
(* GC roots in static memory. (Everything that may be mutable.) *)
labs : LabSet.t ref; (* Used labels (fields and variants),
collected for Motoko custom section 0 *)
(* Local fields (only valid/used inside a function) *)
(* Static *)
n_param : int32; (* Number of parameters (to calculate indices of locals) *)
return_arity : int; (* Number of return values (for type of Return) *)
(* Mutable *)
locals : value_type list ref; (* Types of locals *)
local_names : (int32 * string) list ref; (* Names of locals *)
}
(* The initial global environment *)
let mk_global mode rts trap_with dyn_mem : t = {
mode;
rts;
trap_with;
func_types = ref [];
func_imports = ref [];
other_imports = ref [];
exports = ref [];
funcs = ref [];
func_ptrs = ref FunEnv.empty;
end_of_table = ref 0l;
globals = ref [];
global_names = ref NameEnv.empty;
named_imports = ref NameEnv.empty;
built_in_funcs = ref NameEnv.empty;
static_strings = ref StringEnv.empty;
end_of_static_memory = ref dyn_mem;
static_memory = ref [];
static_memory_frozen = ref false;
static_roots = ref [];
labs = ref LabSet.empty;
(* Actually unused outside mk_fun_env: *)
n_param = 0l;
return_arity = 0;
locals = ref [];
local_names = ref [];
}
(* This wraps Mo_types.Hash.hash to also record which labels we have seen,
so that that data can be put in a custom section, useful for debugging.
Thus Mo_types.Hash.hash should not be called directly!
*)
let hash (env : t) lab =
env.labs := LabSet.add lab (!(env.labs));
Mo_types.Hash.hash lab
let get_labs env = LabSet.elements (!(env.labs))
let mk_fun_env env n_param return_arity =
{ env with
n_param;
return_arity;
locals = ref [];
local_names = ref [];
}
(* We avoid accessing the fields of t directly from outside of E, so here are a
bunch of accessors. *)
let mode (env : t) = env.mode
let add_anon_local (env : t) ty =
let i = reg env.locals ty in
Wasm.I32.add env.n_param i
let add_local_name (env : t) li name =
let _ = reg env.local_names (li, name) in ()
let get_locals (env : t) = !(env.locals)
let get_local_names (env : t) : (int32 * string) list = !(env.local_names)
let _add_other_import (env : t) m =
ignore (reg env.other_imports m)
let add_export (env : t) e =
ignore (reg env.exports e)
let add_global (env : t) name g =
assert (not (NameEnv.mem name !(env.global_names)));
let gi = reg env.globals (g, name) in
env.global_names := NameEnv.add name gi !(env.global_names)
let add_global32_delayed (env : t) name mut : int32 -> unit =
let p = Lib.Promise.make () in
add_global env name p;
(fun init ->
Lib.Promise.fulfill p (nr {
gtype = GlobalType (I32Type, mut);
value = nr (G.to_instr_list (G.i (Const (nr (Wasm.Values.I32 init)))))
})
)
let add_global32 (env : t) name mut init =
add_global32_delayed env name mut init
let get_global (env : t) name : int32 =
match NameEnv.find_opt name !(env.global_names) with
| Some gi -> gi
| None -> raise (Invalid_argument (Printf.sprintf "No global named %s declared" name))
let get_global32_lazy (env : t) name mut init : int32 =
match NameEnv.find_opt name !(env.global_names) with
| Some gi -> gi
| None -> add_global32 env name mut init; get_global env name
let export_global env name =
add_export env (nr {
name = Wasm.Utf8.decode name;
edesc = nr (GlobalExport (nr (get_global env name)))
})
let get_globals (env : t) = List.map (fun (g,n) -> Lib.Promise.value g) !(env.globals)
let reserve_fun (env : t) name =
let (j, fill) = reserve_promise env.funcs name in
let n = Int32.of_int (List.length !(env.func_imports)) in
let fi = Int32.add j n in
let fill_ (f, local_names) = fill (f, name, local_names) in
(fi, fill_)
let add_fun (env : t) name (f, local_names) =
let (fi, fill) = reserve_fun env name in
fill (f, local_names);
fi
let make_lazy_function env name : lazy_function =
Lib.AllocOnUse.make (fun () -> reserve_fun env name)
let lookup_built_in (env : t) name : lazy_function =
match NameEnv.find_opt name !(env.built_in_funcs) with
| None ->
let lf = make_lazy_function env name in
env.built_in_funcs := NameEnv.add name lf !(env.built_in_funcs);
lf
| Some lf -> lf
let built_in (env : t) name : int32 =
Lib.AllocOnUse.use (lookup_built_in env name)
let define_built_in (env : t) name mk_fun : unit =
Lib.AllocOnUse.def (lookup_built_in env name) mk_fun
let get_return_arity (env : t) = env.return_arity
let get_func_imports (env : t) = !(env.func_imports)
let get_other_imports (env : t) = !(env.other_imports)
let get_exports (env : t) = !(env.exports)
let get_funcs (env : t) = List.map Lib.Promise.value !(env.funcs)
let func_type (env : t) ty =
let rec go i = function
| [] -> env.func_types := !(env.func_types) @ [ ty ]; Int32.of_int i
| ty'::tys when ty = ty' -> Int32.of_int i
| _ :: tys -> go (i+1) tys
in
go 0 !(env.func_types)
let get_types (env : t) = !(env.func_types)
let add_func_import (env : t) modname funcname arg_tys ret_tys =
if !(env.funcs) <> [] then
raise (CodegenError "Add all imports before all functions!");
let i = {
module_name = Wasm.Utf8.decode modname;
item_name = Wasm.Utf8.decode funcname;
idesc = nr (FuncImport (nr (func_type env (FuncType (arg_tys, ret_tys)))))
} in
let fi = reg env.func_imports (nr i) in
let name = modname ^ "." ^ funcname in
assert (not (NameEnv.mem name !(env.named_imports)));
env.named_imports := NameEnv.add name fi !(env.named_imports)
let call_import (env : t) modname funcname =
let name = modname ^ "." ^ funcname in
match NameEnv.find_opt name !(env.named_imports) with
| Some fi -> G.i (Call (nr fi))
| _ ->
raise (Invalid_argument (Printf.sprintf "Function import not declared: %s\n" name))
let get_rts (env : t) = env.rts
let trap_with env msg = env.trap_with env msg
let then_trap_with env msg = G.if_ [] (trap_with env msg) G.nop
let else_trap_with env msg = G.if_ [] G.nop (trap_with env msg)
let reserve_static_memory (env : t) size : int32 =
if !(env.static_memory_frozen) then raise (Invalid_argument "Static memory frozen");
let ptr = !(env.end_of_static_memory) in
let aligned = Int32.logand (Int32.add size 3l) (Int32.lognot 3l) in
env.end_of_static_memory := Int32.add ptr aligned;
ptr
let add_mutable_static_bytes (env : t) data : int32 =
let ptr = reserve_static_memory env (Int32.of_int (String.length data)) in
env.static_memory := !(env.static_memory) @ [ (ptr, data) ];
Int32.(add ptr ptr_skew) (* Return a skewed pointer *)
let add_fun_ptr (env : t) fi : int32 =
match FunEnv.find_opt fi !(env.func_ptrs) with
| Some fp -> fp
| None ->
let fp = !(env.end_of_table) in
env.func_ptrs := FunEnv.add fi fp !(env.func_ptrs);
env.end_of_table := Int32.add !(env.end_of_table) 1l;
fp
let get_elems env =
FunEnv.bindings !(env.func_ptrs)
let get_end_of_table env : int32 =
!(env.end_of_table)
let add_static (env : t) (data : StaticBytes.t) : int32 =
let b = StaticBytes.as_bytes data in
match StringEnv.find_opt b !(env.static_strings) with
| Some ptr -> ptr
| None ->
let ptr = add_mutable_static_bytes env b in
env.static_strings := StringEnv.add b ptr !(env.static_strings);
ptr
let get_end_of_static_memory env : int32 =
env.static_memory_frozen := true;
!(env.end_of_static_memory)
let add_static_root (env : t) ptr =
env.static_roots := ptr :: !(env.static_roots)
let get_static_roots (env : t) =
!(env.static_roots)
let get_static_memory env =
!(env.static_memory)
let mem_size env =
Int32.(add (div (get_end_of_static_memory env) page_size) 1l)
let collect_garbage env =
let gc_fn = if !Flags.compacting_gc then "compacting_gc" else "copying_gc" in
let gc_fn = if !Flags.force_gc then gc_fn else "schedule_" ^ gc_fn in
call_import env "rts" gc_fn
end
(* General code generation functions:
Rule of thumb: Here goes stuff that independent of the Motoko AST.
*)
(* Function called compile_* return a list of instructions (and maybe other stuff) *)
let compile_unboxed_const i = G.i (Const (nr (Wasm.Values.I32 i)))
let compile_const_64 i = G.i (Const (nr (Wasm.Values.I64 i)))
let compile_unboxed_zero = compile_unboxed_const 0l
let compile_unboxed_one = compile_unboxed_const 1l
(* Some common arithmetic, used for pointer and index arithmetic *)
let compile_op_const op i =
compile_unboxed_const i ^^
G.i (Binary (Wasm.Values.I32 op))
let compile_add_const = compile_op_const I32Op.Add
let compile_sub_const = compile_op_const I32Op.Sub
let compile_mul_const = compile_op_const I32Op.Mul
let compile_divU_const = compile_op_const I32Op.DivU
let compile_shrU_const = compile_op_const I32Op.ShrU
let compile_shrS_const = compile_op_const I32Op.ShrS
let compile_shl_const = compile_op_const I32Op.Shl
let compile_rotl_const = compile_op_const I32Op.Rotl
let compile_rotr_const = compile_op_const I32Op.Rotr
let compile_bitand_const = compile_op_const I32Op.And
let compile_bitor_const = function
| 0l -> G.nop | n -> compile_op_const I32Op.Or n
let compile_rel_const rel i =
compile_unboxed_const i ^^
G.i (Compare (Wasm.Values.I32 rel))
let compile_eq_const = compile_rel_const I32Op.Eq
let compile_op64_const op i =
compile_const_64 i ^^
G.i (Binary (Wasm.Values.I64 op))
let _compile_add64_const = compile_op64_const I64Op.Add
let compile_sub64_const = compile_op64_const I64Op.Sub
let _compile_mul64_const = compile_op64_const I64Op.Mul
let _compile_divU64_const = compile_op64_const I64Op.DivU
let compile_shrU64_const = function
| 0L -> G.nop | n -> compile_op64_const I64Op.ShrU n
let compile_shrS64_const = function
| 0L -> G.nop | n -> compile_op64_const I64Op.ShrS n
let compile_shl64_const = function
| 0L -> G.nop | n -> compile_op64_const I64Op.Shl n
let compile_bitand64_const = compile_op64_const I64Op.And
let _compile_bitor64_const = function
| 0L -> G.nop | n -> compile_op64_const I64Op.Or n
let compile_eq64_const i =
compile_const_64 i ^^
G.i (Compare (Wasm.Values.I64 I64Op.Eq))
(* more random utilities *)
let bytes_of_int32 (i : int32) : string =
let b = Buffer.create 4 in
let i = Int32.to_int i in
Buffer.add_char b (Char.chr (i land 0xff));
Buffer.add_char b (Char.chr ((i lsr 8) land 0xff));
Buffer.add_char b (Char.chr ((i lsr 16) land 0xff));
Buffer.add_char b (Char.chr ((i lsr 24) land 0xff));
Buffer.contents b
(* A common variant of todo *)
let todo_trap env fn se = todo fn se (E.trap_with env ("TODO: " ^ fn))
let _todo_trap_SR env fn se = todo fn se (SR.Unreachable, E.trap_with env ("TODO: " ^ fn))
(* Locals *)
let new_local_ env t name =
let i = E.add_anon_local env t in
E.add_local_name env i name;
( G.i (LocalSet (nr i))
, G.i (LocalGet (nr i))
, i
)
let new_local env name =
let (set_i, get_i, _) = new_local_ env I32Type name
in (set_i, get_i)
let new_local64 env name =
let (set_i, get_i, _) = new_local_ env I64Type name
in (set_i, get_i)
let new_float_local env name =
let (set_i, get_i, _) = new_local_ env F64Type name
in (set_i, get_i)
(* Some common code macros *)
(* Iterates while cond is true. *)
let compile_while cond body =
G.loop_ [] (
cond ^^ G.if_ [] (body ^^ G.i (Br (nr 1l))) G.nop
)
(* Expects a number n on the stack. Iterates from m to below that number. *)
let from_m_to_n env m mk_body =
let (set_n, get_n) = new_local env "n" in
let (set_i, get_i) = new_local env "i" in
set_n ^^
compile_unboxed_const m ^^
set_i ^^
compile_while
( get_i ^^
get_n ^^
G.i (Compare (Wasm.Values.I32 I32Op.LtU))
) (
mk_body get_i ^^
get_i ^^
compile_add_const 1l ^^
set_i
)
(* Expects a number on the stack. Iterates from zero to below that number. *)
let from_0_to_n env mk_body = from_m_to_n env 0l mk_body
(* Pointer reference and dereference *)
let load_unskewed_ptr : G.t =
G.i (Load {ty = I32Type; align = 2; offset = 0l; sz = None})
let store_unskewed_ptr : G.t =
G.i (Store {ty = I32Type; align = 2; offset = 0l; sz = None})
let load_ptr : G.t =
G.i (Load {ty = I32Type; align = 2; offset = ptr_unskew; sz = None})
let store_ptr : G.t =
G.i (Store {ty = I32Type; align = 2; offset = ptr_unskew; sz = None})
module FakeMultiVal = struct
(* For some use-cases (e.g. processing the compiler output with analysis
tools) it is useful to avoid the multi-value extension.
This module provides mostly transparent wrappers that put multiple values
in statically allocated globals and pull them off again.
So far only does I32Type (but that could be changed).
If the multi_value flag is on, these do not do anything.
*)
let ty tys =
if !Flags.multi_value || List.length tys <= 1
then tys
else []
let global env i =
E.get_global32_lazy env (Printf.sprintf "multi_val_%d" i) Mutable 0l
let store env tys =
if !Flags.multi_value || List.length tys <= 1 then G.nop else
G.concat_mapi (fun i _ ->
G.i (GlobalSet (nr (global env i)))
) tys
let load env tys =
if !Flags.multi_value || List.length tys <= 1 then G.nop else
let n = List.length tys - 1 in
G.concat_mapi (fun i _ ->
G.i (GlobalGet (nr (global env (n - i))))
) tys
end (* FakeMultiVal *)
module Func = struct
(* This module contains basic bookkeeping functionality to define functions,
in particular creating the environment, and finally adding it to the environment.
*)
let of_body env params retty mk_body =
let env1 = E.mk_fun_env env (Int32.of_int (List.length params)) (List.length retty) in
List.iteri (fun i (n,_t) -> E.add_local_name env1 (Int32.of_int i) n) params;
let ty = FuncType (List.map snd params, FakeMultiVal.ty retty) in
let body = G.to_instr_list (
mk_body env1 ^^ FakeMultiVal.store env1 retty
) in
(nr { ftype = nr (E.func_type env ty);
locals = E.get_locals env1;
body }
, E.get_local_names env1)
let define_built_in env name params retty mk_body =
E.define_built_in env name (lazy (of_body env params retty mk_body))
(* (Almost) transparently lift code into a function and call this function. *)
(* Also add a hack to support multiple return values *)
let share_code env name params retty mk_body =
define_built_in env name params retty mk_body;
G.i (Call (nr (E.built_in env name))) ^^
FakeMultiVal.load env retty
(* Shorthands for various arities *)
let share_code0 env name retty mk_body =
share_code env name [] retty (fun env -> mk_body env)
let share_code1 env name p1 retty mk_body =
share_code env name [p1] retty (fun env -> mk_body env
(G.i (LocalGet (nr 0l)))
)
let share_code2 env name (p1,p2) retty mk_body =
share_code env name [p1; p2] retty (fun env -> mk_body env
(G.i (LocalGet (nr 0l)))
(G.i (LocalGet (nr 1l)))
)
let share_code3 env name (p1, p2, p3) retty mk_body =
share_code env name [p1; p2; p3] retty (fun env -> mk_body env
(G.i (LocalGet (nr 0l)))
(G.i (LocalGet (nr 1l)))
(G.i (LocalGet (nr 2l)))
)
let _share_code4 env name (p1, p2, p3, p4) retty mk_body =
share_code env name [p1; p2; p3; p4] retty (fun env -> mk_body env
(G.i (LocalGet (nr 0l)))
(G.i (LocalGet (nr 1l)))
(G.i (LocalGet (nr 2l)))
(G.i (LocalGet (nr 3l)))
)
let share_code7 env name (p1, p2, p3, p4, p5, p6, p7) retty mk_body =
share_code env name [p1; p2; p3; p4; p5; p6; p7] retty (fun env -> mk_body env
(G.i (LocalGet (nr 0l)))
(G.i (LocalGet (nr 1l)))
(G.i (LocalGet (nr 2l)))
(G.i (LocalGet (nr 3l)))
(G.i (LocalGet (nr 4l)))
(G.i (LocalGet (nr 5l)))
(G.i (LocalGet (nr 6l)))
)
end (* Func *)
module RTS = struct
(* The connection to the C and Rust parts of the RTS *)
let system_imports env =
E.add_func_import env "rts" "memcpy" [I32Type; I32Type; I32Type] [I32Type]; (* standard libc memcpy *)
E.add_func_import env "rts" "memcmp" [I32Type; I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "version" [] [I32Type];
E.add_func_import env "rts" "parse_idl_header" [I32Type; I32Type; I32Type; I32Type; I32Type] [];
E.add_func_import env "rts" "leb128_decode" [I32Type] [I32Type];
E.add_func_import env "rts" "sleb128_decode" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_of_word32" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_of_int32" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_to_word32_wrap" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_to_word32_trap" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_to_word32_trap_with" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_of_word64" [I64Type] [I32Type];
E.add_func_import env "rts" "bigint_of_int64" [I64Type] [I32Type];
E.add_func_import env "rts" "bigint_of_float64" [F64Type] [I32Type];
E.add_func_import env "rts" "bigint_to_float64" [I32Type] [F64Type];
E.add_func_import env "rts" "bigint_to_word64_wrap" [I32Type] [I64Type];
E.add_func_import env "rts" "bigint_to_word64_trap" [I32Type] [I64Type];
E.add_func_import env "rts" "bigint_eq" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_isneg" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_count_bits" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_2complement_bits" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_lt" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_gt" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_le" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_ge" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_add" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_sub" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_mul" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_rem" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_div" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_pow" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_neg" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_lsh" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "bigint_abs" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_leb128_size" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_leb128_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "bigint_leb128_decode" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_sleb128_size" [I32Type] [I32Type];
E.add_func_import env "rts" "bigint_sleb128_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "bigint_sleb128_decode" [I32Type] [I32Type];
E.add_func_import env "rts" "leb128_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "sleb128_encode" [I32Type; I32Type] [];
E.add_func_import env "rts" "utf8_valid" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "utf8_validate" [I32Type; I32Type] [];
E.add_func_import env "rts" "skip_leb128" [I32Type] [];
E.add_func_import env "rts" "skip_any" [I32Type; I32Type; I32Type; I32Type] [];
E.add_func_import env "rts" "find_field" [I32Type; I32Type; I32Type; I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "skip_fields" [I32Type; I32Type; I32Type; I32Type] [];
E.add_func_import env "rts" "remember_continuation" [I32Type] [I32Type];
E.add_func_import env "rts" "recall_continuation" [I32Type] [I32Type];
E.add_func_import env "rts" "peek_future_continuation" [I32Type] [I32Type];
E.add_func_import env "rts" "continuation_count" [] [I32Type];
E.add_func_import env "rts" "continuation_table_size" [] [I32Type];
E.add_func_import env "rts" "blob_of_text" [I32Type] [I32Type];
E.add_func_import env "rts" "text_compare" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "text_concat" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "text_iter_done" [I32Type] [I32Type];
E.add_func_import env "rts" "text_iter" [I32Type] [I32Type];
E.add_func_import env "rts" "text_iter_next" [I32Type] [I32Type];
E.add_func_import env "rts" "text_len" [I32Type] [I32Type];
E.add_func_import env "rts" "text_of_ptr_size" [I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "text_singleton" [I32Type] [I32Type];
E.add_func_import env "rts" "text_size" [I32Type] [I32Type];
E.add_func_import env "rts" "text_to_buf" [I32Type; I32Type] [];
E.add_func_import env "rts" "blob_of_principal" [I32Type] [I32Type];
E.add_func_import env "rts" "principal_of_blob" [I32Type] [I32Type];
E.add_func_import env "rts" "compute_crc32" [I32Type] [I32Type];
E.add_func_import env "rts" "blob_iter_done" [I32Type] [I32Type];
E.add_func_import env "rts" "blob_iter" [I32Type] [I32Type];
E.add_func_import env "rts" "blob_iter_next" [I32Type] [I32Type];
E.add_func_import env "rts" "pow" [F64Type; F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "sin" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "cos" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "tan" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "asin" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "acos" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "atan" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "atan2" [F64Type; F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "exp" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "log" [F64Type] [F64Type]; (* musl *)
E.add_func_import env "rts" "fmod" [F64Type; F64Type] [F64Type]; (* remainder, musl *)
E.add_func_import env "rts" "float_fmt" [F64Type; I32Type; I32Type] [I32Type];
E.add_func_import env "rts" "char_to_upper" [I32Type] [I32Type];
E.add_func_import env "rts" "char_to_lower" [I32Type] [I32Type];
E.add_func_import env "rts" "char_is_whitespace" [I32Type] [I32Type];
E.add_func_import env "rts" "char_is_lowercase" [I32Type] [I32Type];
E.add_func_import env "rts" "char_is_uppercase" [I32Type] [I32Type];
E.add_func_import env "rts" "char_is_alphabetic" [I32Type] [I32Type];
E.add_func_import env "rts" "get_max_live_size" [] [I32Type];
E.add_func_import env "rts" "get_reclaimed" [] [I64Type];
E.add_func_import env "rts" "copying_gc" [] [];
E.add_func_import env "rts" "compacting_gc" [] [];
E.add_func_import env "rts" "schedule_copying_gc" [] [];
E.add_func_import env "rts" "schedule_compacting_gc" [] [];
E.add_func_import env "rts" "alloc_words" [I32Type] [I32Type];
E.add_func_import env "rts" "get_total_allocations" [] [I64Type];
E.add_func_import env "rts" "get_heap_size" [] [I32Type];
E.add_func_import env "rts" "init" [] [];
E.add_func_import env "rts" "alloc_blob" [I32Type] [I32Type];
E.add_func_import env "rts" "alloc_array" [I32Type] [I32Type];
()
end (* RTS *)
module Heap = struct
(* General heap object functionality (allocation, setting fields, reading fields) *)
(* Memory addresses are 32 bit (I32Type). *)
let word_size = 4l
(* The heap base global can only be used late, see conclude_module
and GHC.register *)
let get_heap_base env =
G.i (GlobalGet (nr (E.get_global env "__heap_base")))
let get_total_allocation env =
E.call_import env "rts" "get_total_allocations"
let get_reclaimed env =
E.call_import env "rts" "get_reclaimed"
let get_memory_size =
G.i MemorySize ^^
compile_mul_const page_size
let get_max_live_size env =
E.call_import env "rts" "get_max_live_size"
let dyn_alloc_words env =
E.call_import env "rts" "alloc_words"
(* Static allocation (always words)
(uses dynamic allocation for smaller and more readable code) *)
let alloc env (n : int32) : G.t =
compile_unboxed_const n ^^
dyn_alloc_words env
(* Heap objects *)
(* At this level of abstraction, heap objects are just flat arrays of words *)
let load_field (i : int32) : G.t =
let offset = Int32.(add (mul word_size i) ptr_unskew) in
G.i (Load {ty = I32Type; align = 2; offset; sz = None})
let store_field (i : int32) : G.t =
let offset = Int32.(add (mul word_size i) ptr_unskew) in
G.i (Store {ty = I32Type; align = 2; offset; sz = None})
(* Although we occasionally want to treat two 32 bit fields as one 64 bit number *)
let load_field64 (i : int32) : G.t =
let offset = Int32.(add (mul word_size i) ptr_unskew) in
G.i (Load {ty = I64Type; align = 2; offset; sz = None})
let store_field64 (i : int32) : G.t =
let offset = Int32.(add (mul word_size i) ptr_unskew) in
G.i (Store {ty = I64Type; align = 2; offset; sz = None})
(* Or even as a single 64 bit float *)
let load_field_float64 (i : int32) : G.t =
let offset = Int32.(add (mul word_size i) ptr_unskew) in
G.i (Load {ty = F64Type; align = 2; offset; sz = None})
let store_field_float64 (i : int32) : G.t =
let offset = Int32.(add (mul word_size i) ptr_unskew) in
G.i (Store {ty = F64Type; align = 2; offset; sz = None})
(* Create a heap object with instructions that fill in each word *)
let obj env element_instructions : G.t =
let (set_heap_obj, get_heap_obj) = new_local env "heap_object" in
let n = List.length element_instructions in
alloc env (Wasm.I32.of_int_u n) ^^
set_heap_obj ^^
let init_elem idx instrs : G.t =
get_heap_obj ^^
instrs ^^
store_field (Wasm.I32.of_int_u idx)
in
G.concat_mapi init_elem element_instructions ^^
get_heap_obj
(* Convenience functions related to memory *)
(* Copying bytes (works on unskewed memory addresses) *)
let memcpy env = E.call_import env "rts" "memcpy" ^^ G.i Drop
(* Comparing bytes (works on unskewed memory addresses) *)
let memcmp env = E.call_import env "rts" "memcmp"
let register env =
let get_heap_base_fn = E.add_fun env "get_heap_base" (Func.of_body env [] [I32Type] (fun env ->
get_heap_base env
)) in
E.add_export env (nr {
name = Wasm.Utf8.decode "get_heap_base";
edesc = nr (FuncExport (nr get_heap_base_fn))
})
let get_heap_size env =
E.call_import env "rts" "get_heap_size"
end (* Heap *)
module Stack = struct
(* The RTS includes C code which requires a shadow stack in linear memory.
We reserve some space for it at the beginning of memory space (just like
wasm-l would), this way stack overflow would cause out-of-memory, and not
just overwrite static data.
We sometimes use the stack space if we need small amounts of scratch space.
All pointers here are unskewed.
*)
let end_ = page_size (* 64k of stack *)
let register_globals env =
(* stack pointer *)
E.add_global32 env "__stack_pointer" Mutable end_;
E.export_global env "__stack_pointer"
let get_stack_ptr env =
G.i (GlobalGet (nr (E.get_global env "__stack_pointer")))
let set_stack_ptr env =
G.i (GlobalSet (nr (E.get_global env "__stack_pointer")))
let alloc_words env n =
get_stack_ptr env ^^
compile_unboxed_const (Int32.mul n Heap.word_size) ^^
G.i (Binary (Wasm.Values.I32 I32Op.Sub)) ^^
set_stack_ptr env ^^
get_stack_ptr env
let free_words env n =
get_stack_ptr env ^^
compile_unboxed_const (Int32.mul n Heap.word_size) ^^
G.i (Binary (Wasm.Values.I32 I32Op.Add)) ^^
set_stack_ptr env
let with_words env name n f =
let (set_x, get_x) = new_local env name in
alloc_words env n ^^ set_x ^^
f get_x ^^
free_words env n
end (* Stack *)
module ContinuationTable = struct
(* See rts/motoko-rts/src/closure_table.rs *)
let remember env : G.t = E.call_import env "rts" "remember_continuation"
let recall env : G.t = E.call_import env "rts" "recall_continuation"