From e0a3e9ef9573e899320f599f8f9ec66c836c8b60 Mon Sep 17 00:00:00 2001 From: Felix Lange Date: Mon, 28 Oct 2024 16:54:32 +0100 Subject: [PATCH 1/4] withdrawals: interleave storage reads with output formatting This is an improvement because it simplifies the stack usage. The contract now loads one storage item at a time and immediately consumes it. --- src/withdrawals/main.eas | 112 ++++++++++++++++++--------------------- 1 file changed, 52 insertions(+), 60 deletions(-) diff --git a/src/withdrawals/main.eas b/src/withdrawals/main.eas index 89aa8f2..51afe4d 100644 --- a/src/withdrawals/main.eas +++ b/src/withdrawals/main.eas @@ -236,33 +236,13 @@ accum_loop: add ;; [i+head_idx, i, ..] push 3 ;; [3, i+head_idx, i, ..] mul ;; [3*(i+head_idx), i, ..] - push QUEUE_OFFSET ;; [offset, 3*(i+head_idx), i, ..] - add ;; [addr_offset, i, ..] - - ;; Read address. - dup1 ;; [addr_offset, addr_offset, i, ..] - sload ;; [addr, addr_offset, i, ..] - - ;; Compute pk1 offset and read it. - swap1 ;; [addr_offset, addr, i, ..] - push 1 ;; [1, addr_offset, addr, i, ..] - add ;; [pk1_offset, addr, i, ..] - dup1 ;; [pk1_offset, pk1_offset, addr, i, ..] - sload ;; [pk1, pk1_offset, addr, i, ..] - - ;; Compute pk2_am offset and read it. - swap1 ;; [pk1_offset, pk1, addr, i, ..] - push 1 ;; [1, pk1_offset, pk1, addr, i, ..] - add ;; [pk2_am_offset, pk1, addr, i, ..] - sload ;; [pk2_am, pk1, addr, i, ..] - - ;; Reorder values. - swap2 ;; [addr, pk1, pk2_am, i, ..] + push QUEUE_OFFSET ;; [queue_offset, 3*(i+head_idx), i, ..] + add ;; [slotbase, i, ..] ;; Write values to memory flat and contiguously. This require combining the ;; three storage elements (addr, pk1, pk2_am) so there is no padding. ;; - ;; Each stack element has the following layout: + ;; The slots have the following layout: ;; ;; A: addr ;; 0x00 | 00 00 00 00 00 00 00 00 00 00 00 00 aa aa aa aa @@ -275,56 +255,68 @@ accum_loop: ;; C: pk[32:48] ++ am[0:8] -> pk2_am ;; 0x00 | cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc ;; 0x10 | dd dd dd dd dd dd dd dd 00 00 00 00 00 00 00 00 - ;; - ;; To get these three stack elements into the correct contiguous format, it is - ;; neccessary to combine them in the follow form: - ;; - ;; (A[12:32] ++ B[0:12], B[12:32] ++ C[0:12], C[12:24]) - ;; Compute offset = i*RECORD_SIZE. - dup4 ;; [i, addr, pk1, pk2_am, i, ..] - push RECORD_SIZE ;; [size, i, addr, pk1, pk2_am, i, ..] - mul ;; [offset, addr, pk1, pk2_am, i, ..] + ;; Compute the output offset = i*RECORD_SIZE. + dup2 ;; [i, slotbase, i, ..] + push RECORD_SIZE ;; [size, i, slotbase, i, ..] + mul ;; [offset=size*i, slotbase, i, ..] + + ;; Read slot 'addr' from storage. + dup2 ;; [slotbase, offset, slotbase, ..] + sload ;; [addr, offset, slotbase, ..] ;; Shift addr bytes. - swap1 ;; [addr, offset, pk1, pk2_am, i, ..] - push 12*8 ;; [96, addr, offset, pk1, pk2_am, i, ..] - shl ;; [addr<<96, offset, pk1, pk2_am, i, ..] - - ;; Store addr at current offset. - dup2 ;; [offset, addr<<96, offset, pk1, pk2_am, i, ..] - mstore ;; [offset, pk1, pk2_am, i, ..] - push 20 ;; [20, offset, pk1, pk2_am, i, ..] - add ;; [offset, pk1, pk2_am, i, ..] - - ;; Store pk1 at offset = i*RECORD_SIZE + 20. - swap1 ;; [pk1, offset, pk2_am, i, ..] - dup2 ;; [offset, pk1, offset, pk2_am, i, ..] - mstore ;; [offset, pk2_am, i, ..] - push 32 ;; [32, offset, pk2_am, i, ..] - add ;; [offset, pk2_am, i, ..] + push 12*8 ;; [96, addr, offset, slotbase, ..] + shl ;; [addr<<96, offset, slotbase, ..] + + ;; Store addr at output offset = i*RECORD_SIZE. + dup2 ;; [offset, addr<<96, offset, slotbase, ..] + mstore ;; [offset, slotbase, ..] + push 20 ;; [20, offset, slotbase, ..] + add ;; [offset=offset+20, slotbase, ..] + + ;; Read slot 'pk1' from storage. + dup2 ;; [slotbase, offset, slotbase, ..] + push 1 ;; [1, slotbase, offset, slotbase, ..] + add ;; [slot, offset, slotbase, ..] + sload ;; [pk1, offset, slotbase, ..] + + ;; Store pk1 at output offset = i*RECORD_SIZE + 20. + dup2 ;; [offset, pk1, offset, slotbase, ..] + mstore ;; [offset, slotbase, ..] + push 32 ;; [32, offset, slotbase, ..] + add ;; [offset=offset+32, slotbase, ..] + + ;; Read slot 'pk2_am' from storage. + dup2 ;; [slotbase, offset, slotbase, ..] + push 2 ;; [2, slotbase, offset, slotbase, ..] + add ;; [slot, offset, slotbase, ..] + sload ;; [pk2_am, offset, slotbase, ..] ;; Extract pk2 from pk2_am. - dup2 ;; [pk2_am, offset, pk2_am, i, ..] - push pk2_mask ;; [mask, pk2_am, offset, pk2_am, i, ..] - and ;; [pk2, offset, pk2_am, i, ..] + dup1 ;; [pk2_am, pk2_am, offset, slotbase, ..] + push pk2_mask ;; [mask, pk2_am, offset, slotbase, ..] + and ;; [pk2, pk2_am, offset, slotbase, ..] ;; Store pk2 at offset = i*RECORD_SIZE + 52. - dup2 ;; [offset, pk2, offset, pk2_am, i, ..] - mstore ;; [offset, pk2_am, i, ..] - push 16 ;; [16, offset, pk2_am, i, ..] - add ;; [offset, pk2_am, i, ..] + dup3 ;; [offset, pk2, pk2_am, offset, slotbase, ..] + mstore ;; [pk2_am, offset, slotbase, ..] + swap1 ;; [offset, pk2_am, slotbase, ..] + push 16 ;; [16, offset, pk2_am, slotbase, ..] + add ;; [offset=offset+16, pk2_am, slotbase, ..] ;; Extract am from pk2_am. - swap1 ;; [pk2_am, offset, i, ..] - push 8*8 ;; [shft, pk2_am, offset, i, ..] - shr ;; [am, offset, i, ..] + swap1 ;; [pk2_am, offset, slotbase, ..] + push 8*8 ;; [shft, pk2_am, offset, slotbase, ..] + shr ;; [am, offset, slotbase, ..] ;; Store am at offset = i*RECORD_SIZE + 68. - swap1 ;; [offset, am, i, ..] - %mstore_uint64_le() ;; [i, ..] + ;; Note we convert to little-endian. + swap1 ;; [offset, am, slotbase, ..] + %mstore_uint64_le() ;; [slotbase, i, ..] ;; Increment i. + pop ;; [i, ..] push 1 ;; [1, i, ..] add ;; [i+1, ..] From 2525addabf906d6c3934f16b069cb7db21750c6a Mon Sep 17 00:00:00 2001 From: lightclient Date: Mon, 18 Nov 2024 12:14:51 +0800 Subject: [PATCH 2/4] withdrawals: rm slotbase from last iteration, improvve comments --- src/withdrawals/main.eas | 43 ++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/src/withdrawals/main.eas b/src/withdrawals/main.eas index 51afe4d..7bf679e 100644 --- a/src/withdrawals/main.eas +++ b/src/withdrawals/main.eas @@ -244,17 +244,17 @@ accum_loop: ;; ;; The slots have the following layout: ;; - ;; A: addr + ;; 0: addr ;; 0x00 | 00 00 00 00 00 00 00 00 00 00 00 00 aa aa aa aa ;; 0x10 | aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa ;; - ;; B: pk[0:32] -> pk1 + ;; 1: pk[0:32] -> pk1 ;; 0x00 | bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ;; 0x10 | bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb ;; - ;; C: pk[32:48] ++ am[0:8] -> pk2_am - ;; 0x00 | cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc - ;; 0x10 | dd dd dd dd dd dd dd dd 00 00 00 00 00 00 00 00 + ;; 2: pk[32:48] ++ am[0:8] -> pk2_am + ;; 0x00 | bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb + ;; 0x10 | cc cc cc cc cc cc cc cc 00 00 00 00 00 00 00 00 ;; Compute the output offset = i*RECORD_SIZE. dup2 ;; [i, slotbase, i, ..] @@ -288,35 +288,34 @@ accum_loop: add ;; [offset=offset+32, slotbase, ..] ;; Read slot 'pk2_am' from storage. - dup2 ;; [slotbase, offset, slotbase, ..] - push 2 ;; [2, slotbase, offset, slotbase, ..] - add ;; [slot, offset, slotbase, ..] - sload ;; [pk2_am, offset, slotbase, ..] + swap1 ;; [slotbase, offset, ..] + push 2 ;; [2, slotbase, offset, ..] + add ;; [slot, offset, ..] + sload ;; [pk2_am, offset, ..] ;; Extract pk2 from pk2_am. - dup1 ;; [pk2_am, pk2_am, offset, slotbase, ..] - push pk2_mask ;; [mask, pk2_am, offset, slotbase, ..] - and ;; [pk2, pk2_am, offset, slotbase, ..] + dup1 ;; [pk2_am, pk2_am, offset, ..] + push pk2_mask ;; [mask, pk2_am, offset, ..] + and ;; [pk2, pk2_am, offset, ..] ;; Store pk2 at offset = i*RECORD_SIZE + 52. - dup3 ;; [offset, pk2, pk2_am, offset, slotbase, ..] - mstore ;; [pk2_am, offset, slotbase, ..] - swap1 ;; [offset, pk2_am, slotbase, ..] - push 16 ;; [16, offset, pk2_am, slotbase, ..] - add ;; [offset=offset+16, pk2_am, slotbase, ..] + dup3 ;; [offset, pk2, pk2_am, offset, ..] + mstore ;; [pk2_am, offset, ..] + swap1 ;; [offset, pk2_am, ..] + push 16 ;; [16, offset, pk2_am, ..] + add ;; [offset=offset+16, pk2_am, ..] ;; Extract am from pk2_am. swap1 ;; [pk2_am, offset, slotbase, ..] - push 8*8 ;; [shft, pk2_am, offset, slotbase, ..] - shr ;; [am, offset, slotbase, ..] + push 8*8 ;; [shft, pk2_am, offset, ..] + shr ;; [am, offset, ..] ;; Store am at offset = i*RECORD_SIZE + 68. ;; Note we convert to little-endian. - swap1 ;; [offset, am, slotbase, ..] - %mstore_uint64_le() ;; [slotbase, i, ..] + swap1 ;; [offset, am, ..] + %mstore_uint64_le() ;; [i, ..] ;; Increment i. - pop ;; [i, ..] push 1 ;; [1, i, ..] add ;; [i+1, ..] From 67a242a9a5e362a6ad62f780ecfcefa6b4dea7e8 Mon Sep 17 00:00:00 2001 From: lightclient Date: Mon, 18 Nov 2024 12:15:17 +0800 Subject: [PATCH 3/4] consolidations: interleave storage reads with output formatting --- src/consolidations/main.eas | 132 ++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 60 deletions(-) diff --git a/src/consolidations/main.eas b/src/consolidations/main.eas index 2e9127a..3c3da7f 100644 --- a/src/consolidations/main.eas +++ b/src/consolidations/main.eas @@ -234,77 +234,89 @@ accum_loop: jumpi @update_head ;; [i, count, head_idx, tail_idx] ;; Determine the storage slot of the address for this iteration. This value is - ;; also the base for the other storage slots containing the source and the target - ;; public keys. The base slot will be (queue_offset + (queue_head + i)*SLOTS_PER_ITEM). + ;; also the base for the other storage slots containing the source and target + ;; public keys. The base slot will be (queue_offset + (queue_head + i)*4). dup3 ;; [head_idx, i, ..] dup2 ;; [i, head_idx, i, ..] add ;; [i+head_idx, i, ..] - push SLOTS_PER_ITEM ;; [SLOTS_PER_ITEM, i+head_idx, i, ..] - mul ;; [SLOTS_PER_ITEM*(i+head_idx), i, ..] - push QUEUE_OFFSET ;; [offset, SLOTS_PER_ITEM*(i+head_idx), i, ..] - add ;; [addr_offset, i, ..] - - ;; Read address from slot 0. - dup1 ;; [addr_offset, addr_offset, i, ..] - sload ;; [addr, addr_offset, i, ..] - - ;; Read source[0:32] from slot 1. - swap1 ;; [addr_offset, addr, i, ..] - push 1 ;; [1, addr_offset, addr, i, ..] - add ;; [slot1_offset, addr, i, ..] - dup1 ;; [slot1_offset, slot1_offset, addr, i, ..] - sload ;; [source[0:32], slot1_offset, addr, i, ..] - - ;; Read source[32:48] and target[0:16] from slot 2. - swap1 ;; [slot1_offset, source[0:32], addr, i, ..] - push 1 ;; [1, slot1_offset, source[0:32], addr, i, ..] - add ;; [slot2_offset, source[0:32], addr, i, ..] - dup1 ;; [slot2_offset, slot2_offset, source[0:32], addr, i, ..] - sload ;; [src[32:48] ++ tgt[0:16], slot2_offset, source[0:32], addr, i, ..] - - ;; Read target[16:48] from slot 3. - swap1 ;; [slot2_offset, src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] - push 1 ;; [1, slot2_offset, src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] - add ;; [slot3_offset, src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] - sload ;; [target[16:32], src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] + push SLOTS_PER_ITEM ;; [4, i+head_idx, i, ..] + mul ;; [4*(i+head_idx), i, ..] + push QUEUE_OFFSET ;; [offset, 4*(i+head_idx), i, ..] + add ;; [slotbase, i, ..] ;; Write values to memory flat and contiguously. This require combining the - ;; four storage elements so there is no padding: - ;; (addr, source[0:32], source[32:48] ++ target[0:16], target[16:48]) + ;; four storage elements (addr, spk1, spk2_tpk1, tpk2) so there is no padding. + ;; + ;; The slots have the following layout: + ;; + ;; 0: addr + ;; 0x00 | 00 00 00 00 00 00 00 00 00 00 00 00 aa aa aa aa + ;; 0x10 | aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa + ;; + ;; 1: source[0:32] -> spk1 + ;; 0x00 | bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb + ;; 0x10 | bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb + ;; + ;; 2: source[32:48] ++ target[0:16] -> spk2_tpk1 + ;; 0x00 | bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb + ;; 0x10 | cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc + ;; + ;; 3: target[16:48] -> tpk2 + ;; 0x20 | cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc + ;; 0x30 | cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc - ;; Compute offset = i*RECORD_SIZE. + ;; Compute the output offset = i*RECORD_SIZE. + dup2 ;; [i, slotbase, i, ..] + push RECORD_SIZE ;; [size, i, slotbase, i, ..] + mul ;; [offset=size*i, slotbase, i, ..] - dup5 ;; [i, target[16:32], src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] - push RECORD_SIZE ;; [size, i, target[16:32], src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] - mul ;; [offset, target[16:32], src[32:48] ++ tgt[0:16], source[0:32], addr, i, ..] + ;; Read slot 'addr' from storage. + dup2 ;; [slotbase, offset, slotbase, ..] + sload ;; [addr, offset, slotbase, ..] ;; Shift addr bytes. - swap4 ;; [addr, src[32:48] ++ tgt[0:16], source[0:32], target[16:32], offset, i, ..] - push 12*8 ;; [96, addr, src[32:48] ++ tgt[0:16], source[0:32], target[16:32], offset, i, ..] - shl ;; [addr<<96, src[32:48] ++ tgt[0:16], source[0:32], target[16:32], offset, i, ..] + push 12*8 ;; [96, addr, offset, slotbase, ..] + shl ;; [addr<<96, offset, slotbase, ..] ;; Store addr at offset = i*RECORD_SIZE. - dup5 ;; [offset, addr<<96, offset, src[32:48] ++ tgt[0:16], source[0:32], target[16:32], i, ..] - mstore ;; [offset, src[32:48] ++ tgt[0:16], source[0:32], target[16:32], i, ..] - - ;; Store source[0:32] at offset = i*RECORD_SIZE + 20. - swap2 ;; [source[0:32], src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - dup4 ;; [offset, source[0:32], src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - push 20 ;; [20, offset, source[0:32], src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - add ;; [offset+20, source[0:32], src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - mstore ;; [src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - - ;; Store src[32:48] ++ tgt[0:16] at offset = i*RECORD_SIZE + 52. - dup3 ;; [offset, src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - push 52 ;; [52, offset, src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - add ;; [offset+52, src[32:48] ++ tgt[0:16], target[16:32], offset, i, ..] - mstore ;; [target[16:32], offset, i, ..] - - ;; Store target[16:48] at offset = i*RECORD_SIZE + 84. - swap1 ;; [offset, target[16:32], i, ..] - push 84 ;; [84, offset, target[16:32], i, ..] - add ;; [offset+84, target[16:32], i, ..] - mstore ;; [i, ..] + dup2 ;; [offset, addr<<96, offset, slotbase, ..] + mstore ;; [offset, slotbase, ..] + push 20 ;; [20, offset, slotbase, ..] + add ;; [offset=offset+20, slotbase, ..] + + ;; Read slot 'spk1' from storage. + dup2 ;; [slotbase, offset, slotbase, ..] + push 1 ;; [1, slotbase, offset, slotbase, ..] + add ;; [slot, offset, slotbase, ..] + sload ;; [spk1, offset, slotbase, ..] + + ;; Store spk1 at output offset = i*RECORD_SIZE+20. + dup2 ;; [offset, spk1, offset, slotbase, ..] + mstore ;; [offset, slotbase, ..] + push 32 ;; [32, offset, slotbase, ..] + add ;; [offset=offset+32, slotbase, ..] + + ;; Read slot 'spk2_tpk1' from stoarge. + dup2 ;; [slotbase, offset, slotbase, ..] + push 2 ;; [1, slotbase, offset, slotbase, ..] + add ;; [slot, offset, slotbase, ..] + sload ;; [spk2_tpk1, offset, slotbase, ..] + + ;; Store spk2_tpk1 at output offset = i*RECORD_SIZE+52. + dup2 ;; [offset, src[32:48] ++ tgt[0:16], offset, slotbase, ..] + mstore ;; [offset, slotbase, ..] + push 32 ;; [32, offset, slotbase, ..] + add ;; [offset=offset+32, slotbase, ..] + + ;; Read target[16:48] from slot 3. + swap1 ;; [slotbase, offset, ..] + push 3 ;; [3, slotbase, offset, ..] + add ;; [slot, offset, ..] + sload ;; [tpk2, offset, ..] + + ;; Store tpk2 at output offset = i*RECORD_SIZE+84. + swap1 ;; [offset, tpk2, ..] + mstore ;; [..] ;; Increment i. push 1 ;; [1, i, ..] From 4637ffaf3c122aded4b1c2a3cda43a412da38e99 Mon Sep 17 00:00:00 2001 From: lightclient Date: Mon, 18 Nov 2024 12:26:33 +0800 Subject: [PATCH 4/4] typo fix --- src/consolidations/main.eas | 2 +- src/withdrawals/main.eas | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/consolidations/main.eas b/src/consolidations/main.eas index 3c3da7f..81e867f 100644 --- a/src/consolidations/main.eas +++ b/src/consolidations/main.eas @@ -244,7 +244,7 @@ accum_loop: push QUEUE_OFFSET ;; [offset, 4*(i+head_idx), i, ..] add ;; [slotbase, i, ..] - ;; Write values to memory flat and contiguously. This require combining the + ;; Write values to memory flat and contiguously. This requires combining the ;; four storage elements (addr, spk1, spk2_tpk1, tpk2) so there is no padding. ;; ;; The slots have the following layout: diff --git a/src/withdrawals/main.eas b/src/withdrawals/main.eas index 7bf679e..38128de 100644 --- a/src/withdrawals/main.eas +++ b/src/withdrawals/main.eas @@ -239,7 +239,7 @@ accum_loop: push QUEUE_OFFSET ;; [queue_offset, 3*(i+head_idx), i, ..] add ;; [slotbase, i, ..] - ;; Write values to memory flat and contiguously. This require combining the + ;; Write values to memory flat and contiguously. This requires combining the ;; three storage elements (addr, pk1, pk2_am) so there is no padding. ;; ;; The slots have the following layout: