From b5449cfe539cb8c28760584415fa0d0c3f43ae55 Mon Sep 17 00:00:00 2001 From: Noel Kwan Date: Sat, 10 Jul 2021 02:42:28 +0800 Subject: [PATCH] Implement instructions for known-size bytes See: https://github.com/fare/gerbil-ethereum/issues/49 --- evm-instructions.ss | 115 ++++++++++++++++++++++ t/100-evm-instructions-integrationtest.ss | 110 +++++++++++++++++++++ t/80-evm-eval-integrationtest.ss | 14 ++- t/evm-instructions-test.ss | 39 ++++++++ 4 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 evm-instructions.ss create mode 100644 t/100-evm-instructions-integrationtest.ss create mode 100644 t/evm-instructions-test.ss diff --git a/evm-instructions.ss b/evm-instructions.ss new file mode 100644 index 0000000..1d07980 --- /dev/null +++ b/evm-instructions.ss @@ -0,0 +1,115 @@ +(export #t) +(import + :gerbil/gambit/bits :gerbil/gambit/bytes :gerbil/gambit/exact + :std/misc/number :std/sugar :std/misc/list :std/format + :clan/base :clan/number :clan/with-id :std/srfi/1 + :clan/poo/object (only-in :clan/poo/mop Type) + :clan/crypto/secp256k1 + ./assembly ./ethereum ./types ./evm-runtime) + +;; -------------------------------- +;; General purpose EVM instructions +;; -------------------------------- +;; Extends instruction set. +;; Examples include: +;; - Storing bytes larger than EVM Word +;; - Storing to memory with reference +;; - Load from memory with reference +;; NOTE: Reference is start-offset, length on stack. +;; +;; ------------- +;; Byte encoding +;; ------------- +;; Instructions encode bytes as big-endian. +;; +;; For example, given the value: 0x00ff, +;; After PUSH: Stack will have "0x00" on top, followed by "0xff". +;; After STORE: Memory location will have "0x00" at offset, "0xff" at offset+1. +;; +;; For larger bytes (larger than a EVM word), +;; we partition them into smaller chunks, +;; less than or equal to a EVM word. +;; We use the notation: "part[n]", to denote the partition index. +;; E.g. 65-bytes -> | 32-bytes | 32-bytes | 1-byte | +;; | part0 | part1 | part2 | + + +;; ---------------------- +;; Instruction defintions +;; ---------------------- + +;; stack input: - +;; stack output: part0 part1 ... partn +(def (&push/any-size bytes (start 0) (len (u8vector-length bytes))) + (assert! (<= 1 len) (format "size: ~d not in range: [1,∞]")) ; TODO: provide a concrete upper bound + (def end (min len (+ start 32))) + (when (< start end) + (let () + (def bytes<=32 (subu8vector bytes start end)) + (def n-bytes (u8vector-length bytes<=32)) + (&begin (&push/any-size bytes end) [&push-bytes bytes<=32]) + ))) + +;; Helper function - Make list of word-sizes +;; E.g. (sizes/word-size<-size 65 32) -> [32 32 1] +(def (sizes/word-size<-size size (word-size 32)) + (def n-words (truncate (/ size word-size))) + (def words (make-list n-words word-size)) + (def rem (modulo size word-size)) + (if (eq? rem 0) words [words ... rem]) + ) + +;; NOTE: Uses brk@ for offset via &brk-cons, dependent on EVM memory layout. +;; stack input: part0 part1 ... partn +;; stack output: - +(def (&mstore/free/any-size size) + (assert! (<= 1 size) (format "size: ~d not in range: [1,∞]")) ; TODO: provide a concrete upper bound + (def sizes/base-32 (sizes/word-size<-size size)) + (&begin (map &brk-cons sizes/base-32) ...) + ) + +;; Helper function - Make list of relative offsets and sizes for partitions. +;; E.g. (offsets-and-sizes<-size 65) -> [[0 32] [32 32] [64 1]] +;; List [RelativeOffset Size] <- Nat +(def (offsets-and-sizes<-size size) + (def sizes (sizes/word-size<-size size)) + (def relative-offsets (iota (length sizes) 0 32)) + (zip relative-offsets sizes)) + +;; Given relative-offset and size, +;; generates EVM code to: +;; - load specified bytes +;; - maintain start-offset for loading next segment +;; +;; stack input: start-offset +;; stack output: start-offset bytes[offset:end] +;; where offset = offset+relative-offset +;; end = offset+size +(def &mload-1/any-size + (match <> + ([relative-offset size] + (&begin ; start-offset + DUP1 relative-offset ADD ; offset start-offset + (&mload size) SWAP1)))) ; start-offset bytes[offset:end] + +;; stack input: start-offset +;; stack output: part0 part1 ... partn +(def (&mload/any-size size) + (assert! (<= 0 size) (format "size: ~d not in range: [1,∞]")) ; TODO: provide a concrete upper bound + (match (offsets-and-sizes<-size size) + ;; = 0 bytes + ([] (&begin POP 0) + ) + ;; > 0 bytes + ([[_ start-size] . rest] + (&begin + (map &mload-1/any-size (reverse rest)) ... + (&mload start-size))) + ) + ) + +;; stack in: - +;; mem in: part0 part1 ... partn +;; stack out: part0 part1 ... partn +(def (&mloadat/any-size offset length-size) + (&begin offset (&mload/any-size length-size))) diff --git a/t/100-evm-instructions-integrationtest.ss b/t/100-evm-instructions-integrationtest.ss new file mode 100644 index 0000000..6d12040 --- /dev/null +++ b/t/100-evm-instructions-integrationtest.ss @@ -0,0 +1,110 @@ +(export #t) + +(import + :std/test :clan/number :clan/poo/object + ../types ../assembly ../evm-runtime + ../testing ../evm-instructions.ss + ./10-json-rpc-integrationtest) + +;; Stores free memory pointer at free memory location for returning +(def &store-brk + (&begin (&mloadat brk@) DUP1 MSTORE)) + +(def 100-evm-instructions-integrationtest + (test-suite "integration tests for evm instructions" + (test-case "EVM-type: &mstore/free/any-size, size = 1" + (evm-test [] (&begin + #x20 (&mstoreat brk@ 32) + (&push/any-size #u8(1)) ; stack: 1 + (&mstore/free/any-size 1) + &store-brk + ) + [[Bytes1 . #u8(1)] + [UInt256 . 33]] + result-in-memory?: #t + result-start: #x20)) + + (test-case "EVM-type: &mstore/free/any-size, size = 65" + (def 65-bytes (list->u8vector (make-list 65 65))) + (u8vector-set! 65-bytes 0 66) + (u8vector-set! 65-bytes 32 67) + (u8vector-set! 65-bytes 64 68) + + (def 65-bytes/0-32 (subu8vector 65-bytes 0 32)) + (def 65-bytes/32-64 (subu8vector 65-bytes 32 64)) + (def 65-bytes/64-65 (subu8vector 65-bytes 64 65)) + + (evm-test [] (&begin + #x20 (&mstoreat brk@ 32) + (&push/any-size 65-bytes) + (&mstore/free/any-size 65) + &store-brk + ) + [[Bytes32 . 65-bytes/0-32] + [Bytes32 . 65-bytes/32-64] + [Bytes1 . 65-bytes/64-65] + [UInt256 . 97] ; 32 (free mem ptr) + 65 (str65) = 97 + ] + result-in-memory?: #t + result-start: #x20) + ) + + (test-case "EVM-type &mload/free/any-size, size = 65" + (def 65-bytes (list->u8vector (make-list 65 65))) + (u8vector-set! 65-bytes 0 66) + (u8vector-set! 65-bytes 32 67) + (u8vector-set! 65-bytes 64 68) + + (def 65-bytes/0-32 (subu8vector 65-bytes 0 32)) + (def 65-bytes/32-64 (subu8vector 65-bytes 32 64)) + (def 65-bytes/64-65 (subu8vector 65-bytes 64 65)) + + (evm-test [] (&begin + ;; init freememptr + #x20 (&mstoreat brk@ 32) ; - + + ;; Store str65 in mem + (&push/any-size 65-bytes) ; bytes[0-32] bytes[32-64] bytes[64-65] + (&mstore/free/any-size 65) ; - + (&mloadat/any-size #x20 65) ; bytes[0-32] bytes[32-64] bytes[64-65] + (&mstore/free/any-size 65) ; - + + &store-brk + ) + + ;; Original 65-bytes + [[Bytes32 . 65-bytes/0-32] + [Bytes32 . 65-bytes/32-64] + [Bytes1 . 65-bytes/64-65] + + ;; Duplicate 65-bytes + [Bytes32 . 65-bytes/0-32] + [Bytes32 . 65-bytes/32-64] + [Bytes1 . 65-bytes/64-65] + + ;; freememptr: 32 + 65 + 65 = 162 + [UInt256 . 162 ] + ] + result-in-memory?: #t + result-start: #x20) + ) + + ;; Dependent on network config initialized during integration tests + (test-case "&mload/any-size assembled, size = 65" + (def &load65/actual (assemble/bytes (&mload/any-size 65))) + (def &load65/expected + (assemble/bytes (&begin ; -- offset + ;; Load last string segment + DUP1 64 ADD ; -- offset+64 offset + (&mload 1) SWAP1 ; -- offset bytes[64-65] + + ;; Load second string segment + DUP1 32 ADD ; -- offset+32 offset bytes[64-65] + (&mload 32) SWAP1 ; -- offset bytes[32-64] bytes[64-65] + + ;; Load first string segment + (&mload 32) ; -- bytes[0-32] bytes[32-64] bytes[64-65] + ))) + (check-equal? &load65/actual &load65/expected) + ) + )) diff --git a/t/80-evm-eval-integrationtest.ss b/t/80-evm-eval-integrationtest.ss index eaf63bb..893ad8a 100644 --- a/t/80-evm-eval-integrationtest.ss +++ b/t/80-evm-eval-integrationtest.ss @@ -263,4 +263,16 @@ (test-case "&marshal UInt8" (evm-test [] (&begin brk DUP1 DUP1 (&marshal UInt16 7)) - [[UInt16 . 2]])))) + [[UInt16 . 2]])) + + (test-case "&mstore 1 byte" + (evm-test [] (&begin 42 0 (&mstore 1)) + [[UInt8 . 42]] + result-in-memory?: #t)) + + (test-case "&mstore 32 bytes" + (def maxUInt256 (- (expt 2 256) 1)) + (evm-test [] (&begin maxUInt256 0 (&mstore 32)) + [[UInt256 . maxUInt256]] + result-in-memory?: #t)) + )) diff --git a/t/evm-instructions-test.ss b/t/evm-instructions-test.ss new file mode 100644 index 0000000..a155c7f --- /dev/null +++ b/t/evm-instructions-test.ss @@ -0,0 +1,39 @@ +(export #t) + +(import + :std/test :clan/poo/object + ../assembly ../evm-instructions ../types) + +;; Verify assembled bytecode of instruction(s) +(def (check-inst? i/actual i/expected) + (check-equal? (assemble/bytes i/actual) (assemble/bytes i/expected))) + +;; NOTE: Boxed/Unboxed stack<-mem (load) methods are dependent +;; on EVM network (eip145). +;; Hence they are tested in integration tests. +(def evm-instructions-test + (test-suite "test suite for evm-instructions" + (test-case "&push/any-size <= 32" + (def 1-byte #u8(1)) + (check-inst? (&push/any-size 1-byte) [1-byte]) + + (def 5-bytes #u8(104 101 108 108 111)) + (check-inst? (&push/any-size 5-bytes) [5-bytes]) + + (def 32-bytes (list->u8vector (make-list 32 65))) + (check-inst? (&push/any-size 32-bytes) [32-bytes]) + ) + + (test-case "&push/any-size > 32" + (def 65-bytes (list->u8vector (make-list 65 65))) + (u8vector-set! 65-bytes 0 66) ; bytes/0-32: 66 65 65 ... 65 + (u8vector-set! 65-bytes 32 67) ; bytes/32-64: 67 65 65 ... 65 + (u8vector-set! 65-bytes 64 68) ; bytes/64-65: 68 + + (check-inst? (&push/any-size 65-bytes) + [(subu8vector 65-bytes 64 65) ; bytes/64-65: 68 + (subu8vector 65-bytes 32 64) ; bytes/32-64: 67 65 65 ... 65 + (subu8vector 65-bytes 0 32) ; bytes/0-32: 66 65 65 ... 65 + ]) + ) + ))