Skip to content

Commit

Permalink
perf: optimise from_word128_ptr by speculation (#3893)
Browse files Browse the repository at this point in the history
...that the upper 64 bits of the cycle count are all zeros. Since 1T cycles is about one dollar, having 1 in the upper word will have a value more than 18M dollars. I guess that sending such amounts is rare.
However doing the bignum arithmetic to construct, shift and add the upper word is costly, see https://dfinity.github.io/canister-profiling/heartbeat/Motoko_heartbeat.svg

Of course the slow paths should be tested well :-) — Done by `test/run-drun/basic-cycles.mo`!

---------------
TODO
- [ ] We could explore an ultra-fast path too, when the whole amount is 0.
  • Loading branch information
ggreif authored Mar 23, 2023
1 parent 87f9371 commit 6319d2d
Showing 1 changed file with 20 additions and 11 deletions.
31 changes: 20 additions & 11 deletions src/codegen/compile.ml
Original file line number Diff line number Diff line change
Expand Up @@ -4439,17 +4439,26 @@ module Cycles = struct

let from_word128_ptr env = Func.share_code1 env "from_word128_ptr" ("ptr", I32Type) [I32Type]
(fun env get_ptr ->
get_ptr ^^
(G.i (Load {ty = I64Type; align = 0; offset = 0l; sz = None })) ^^
BigNum.from_word64 env ^^
get_ptr ^^
compile_add_const 8l ^^
(G.i (Load {ty = I64Type; align = 0; offset = 0l; sz = None })) ^^
BigNum.from_word64 env ^^
(* shift left 64 bits *)
compile_unboxed_const 64l ^^
BigNum.compile_lsh env ^^
BigNum.compile_add env)
let set_lower, get_lower = new_local env "lower" in
get_ptr ^^
G.i (Load {ty = I64Type; align = 0; offset = 0l; sz = None }) ^^
BigNum.from_word64 env ^^
set_lower ^^
get_ptr ^^
G.i (Load {ty = I64Type; align = 0; offset = 8l; sz = None }) ^^
G.i (Test (Wasm.Values.I64 I64Op.Eqz)) ^^
G.if1 I32Type
get_lower
begin
get_lower ^^
get_ptr ^^
G.i (Load {ty = I64Type; align = 0; offset = 8l; sz = None }) ^^
BigNum.from_word64 env ^^
(* shift left 64 bits *)
compile_unboxed_const 64l ^^
BigNum.compile_lsh env ^^
BigNum.compile_add env
end)

(* takes a bignum from the stack, traps if ≥2^128, and leaves two 64bit words on the stack *)
(* only used twice, so ok to not use share_code1; that would require I64Type support in FakeMultiVal *)
Expand Down

0 comments on commit 6319d2d

Please sign in to comment.