From 04a48c5d758add6cb3364175a873b092b686dca0 Mon Sep 17 00:00:00 2001 From: Wolfgang Grieskamp Date: Sun, 1 Sep 2024 23:51:15 -0700 Subject: [PATCH] [compiler-v2] Ast generator from stackless bytecode Converter from stackless bytecode into Model AST. We already have one from binary format to stackless. This will be useful in many places in the stack, not at least for debugging, but also as a potential different approach to decompilation. For the later, we still need to create an AST -> Move translator, today we have only debug dump. Most of the logic is contained in the single new file `ast_generator.rs`. There is some longer module description, and I refer to there for whats provided. This is still a prototype, but I'd like to land for iteration. There are some tests in a new test crate `ast-generator-tests`. These call the v2 compiler to compile up to file format, then decompile from there into stackless bytecode, and into AST. However, the best way to test this functionality is to actually integrate it into an `compile-execute == compile-decompile-compile-execute` test on many of our existing tests, and even the framework. --- Cargo.lock | 20 + Cargo.toml | 2 + .../src/bytecode_generator.rs | 5 +- .../src/env_pipeline/inliner.rs | 4 +- .../move/move-model/bytecode/Cargo.toml | 1 + .../bytecode/ast-generator-tests/Cargo.toml | 23 + .../bytecode/ast-generator-tests/src/lib.rs | 5 + .../tests/conditionals.exp | 438 ++++ .../tests/conditionals.move | 38 + .../ast-generator-tests/tests/loops.exp | 465 ++++ .../ast-generator-tests/tests/loops.move | 34 + .../ast-generator-tests/tests/match.exp | 299 +++ .../ast-generator-tests/tests/match.move | 21 + .../ast-generator-tests/tests/testsuite.rs | 115 + .../move/move-model/bytecode/src/astifier.rs | 1998 +++++++++++++++++ .../bytecode/src/dataflow_domains.rs | 9 +- .../move/move-model/bytecode/src/lib.rs | 1 + .../bytecode/src/stackless_bytecode.rs | 31 +- .../src/stackless_control_flow_graph.rs | 47 +- third_party/move/move-model/src/ast.rs | 162 +- .../move-model/src/builder/exp_builder.rs | 6 +- .../move/move-model/src/exp_builder.rs | 178 ++ .../move/move-model/src/exp_rewriter.rs | 6 +- third_party/move/move-model/src/lib.rs | 1 + third_party/move/move-model/src/model.rs | 10 +- 25 files changed, 3884 insertions(+), 35 deletions(-) create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/Cargo.toml create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/src/lib.rs create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.exp create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.move create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.exp create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.move create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/match.exp create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/match.move create mode 100644 third_party/move/move-model/bytecode/ast-generator-tests/tests/testsuite.rs create mode 100644 third_party/move/move-model/bytecode/src/astifier.rs create mode 100644 third_party/move/move-model/src/exp_builder.rs diff --git a/Cargo.lock b/Cargo.lock index 5f619517cfcdb..1608bcc5bfe97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4868,6 +4868,19 @@ dependencies = [ "ansi_term", ] +[[package]] +name = "ast-generator-tests" +version = "0.1.0" +dependencies = [ + "anyhow", + "codespan-reporting", + "datatest-stable", + "move-compiler-v2", + "move-model", + "move-prover-test-utils", + "move-stackless-bytecode", +] + [[package]] name = "async-channel" version = "1.9.0" @@ -11398,6 +11411,7 @@ dependencies = [ "num 0.4.1", "paste", "petgraph 0.6.5", + "topological-sort", ] [[package]] @@ -16678,6 +16692,12 @@ dependencies = [ "tonic 0.11.0", ] +[[package]] +name = "topological-sort" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" + [[package]] name = "tower" version = "0.4.13" diff --git a/Cargo.toml b/Cargo.toml index f58a1e238a281..d47d1cc3f8670 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -220,6 +220,7 @@ members = [ "third_party/move/move-model", "third_party/move/move-model/bytecode", "third_party/move/move-model/bytecode-test-utils", + "third_party/move/move-model/bytecode/ast-generator-tests", "third_party/move/move-prover", "third_party/move/move-prover/boogie-backend", "third_party/move/move-prover/bytecode-pipeline", @@ -786,6 +787,7 @@ tonic = { version = "0.11.0", features = [ "zstd", ] } tonic-reflection = "0.11.0" +topological-sort = "0.2.2" triomphe = "0.1.9" tui = "0.19.0" typed-arena = "2.0.2" diff --git a/third_party/move/move-compiler-v2/src/bytecode_generator.rs b/third_party/move/move-compiler-v2/src/bytecode_generator.rs index e20f3b09bf19e..16248e056d369 100644 --- a/third_party/move/move-compiler-v2/src/bytecode_generator.rs +++ b/third_party/move/move-compiler-v2/src/bytecode_generator.rs @@ -447,7 +447,7 @@ impl<'env> Generator<'env> { self.emit_with(*id, |attr| Bytecode::Jump(attr, continue_label)); self.emit_with(*id, |attr| Bytecode::Label(attr, break_label)); }, - ExpData::LoopCont(id, do_continue) => { + ExpData::LoopCont(id, 0, do_continue) => { if let Some(LoopContext { continue_label, break_label, @@ -463,6 +463,9 @@ impl<'env> Generator<'env> { self.error(*id, "missing enclosing loop statement") } }, + ExpData::LoopCont(_, _, _) => { + unimplemented!("continue/break with nesting") + }, ExpData::SpecBlock(id, spec) => { // Map locals in spec to assigned temporaries. let mut replacer = |id, target| { diff --git a/third_party/move/move-compiler-v2/src/env_pipeline/inliner.rs b/third_party/move/move-compiler-v2/src/env_pipeline/inliner.rs index 6047eae1a82ab..77a4ce7eefede 100644 --- a/third_party/move/move-compiler-v2/src/env_pipeline/inliner.rs +++ b/third_party/move/move-compiler-v2/src/env_pipeline/inliner.rs @@ -800,7 +800,7 @@ impl<'env, 'rewriter> InlinedRewriter<'env, 'rewriter> { (lambda expressions)", ) }, - ExpData::LoopCont(node_id, is_continue) if !post && in_loop == 0 => { + ExpData::LoopCont(node_id, _, is_continue) if !post && in_loop == 0 => { let node_loc = env.get_node_loc(*node_id); env.error( &node_loc, @@ -1046,7 +1046,7 @@ impl<'env, 'rewriter> ExpRewriterFunctions for InlinedRewriter<'env, 'rewriter> self.in_loop += 1; true }, - ExpData::LoopCont(node_id, is_continue) if self.in_loop == 0 => { + ExpData::LoopCont(node_id, _, is_continue) if self.in_loop == 0 => { let node_loc = self.env.get_node_loc(*node_id); self.env.error( &node_loc, diff --git a/third_party/move/move-model/bytecode/Cargo.toml b/third_party/move/move-model/bytecode/Cargo.toml index 43e0ed1ef7a07..ae85ec999da93 100644 --- a/third_party/move/move-model/bytecode/Cargo.toml +++ b/third_party/move/move-model/bytecode/Cargo.toml @@ -24,6 +24,7 @@ log = { workspace = true } num = { workspace = true } paste = { workspace = true } petgraph = { workspace = true } +topological-sort = { workspace = true } [dev-dependencies] anyhow = { workspace = true } diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/Cargo.toml b/third_party/move/move-model/bytecode/ast-generator-tests/Cargo.toml new file mode 100644 index 0000000000000..cb0a9218559ac --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "ast-generator-tests" +version = "0.1.0" +edition = "2021" +license = { workspace = true } +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dev-dependencies] +anyhow = { workspace = true } +codespan-reporting = { workspace = true, features = ["serde", "serialization"] } +datatest-stable = { workspace = true } +move-compiler-v2 = { workspace = true } +move-model = { workspace = true } +move-prover-test-utils = { workspace = true } + +move-stackless-bytecode = { path = ".." } + +[[test]] +name = "testsuite" +harness = false + +[lib] +doctest = false diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/src/lib.rs b/third_party/move/move-model/bytecode/ast-generator-tests/src/lib.rs new file mode 100644 index 0000000000000..dd271f8c0ff7b --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/src/lib.rs @@ -0,0 +1,5 @@ +// Copyright © Aptos Foundation +// Parts of the project are originally copyright © Meta Platforms, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! Intentionally empty diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.exp b/third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.exp new file mode 100644 index 0000000000000..ea46a4810faa1 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.exp @@ -0,0 +1,438 @@ + +=== Processing m::if_1 ===================================================== +--- Source +fun if_1(c: bool): u8 { + let result = 0; + if (c) { + result = 1; + }; + result + } + +--- Stackless Bytecode +fun m::if_1($t0|c: bool): u8 { + var $t1|result: u8 + var $t2: u8 + var $t3: bool + var $t4: u8 + var $t5: u8 + 0: $t2 := 0 + 1: $t1 := $t2 + 2: $t3 := move($t0) + 3: if ($t3) goto 4 else goto 8 + 4: label L1 + 5: $t4 := 1 + 6: $t1 := $t4 + 7: goto 8 + 8: label L0 + 9: $t5 := move($t1) + 10: return $t5 +} + +--- Raw Generated AST +_t2: u8 = 0; +_t1: u8 = _t2; +_t3: bool = c; +loop { + if (Not(_t3)) break; + _t4: u8 = 1; + _t1: u8 = _t4; + break +}; +_t5: u8 = _t1; +return _t5 + +--- If-Transformed Generated AST +_t2: u8 = 0; +_t1: u8 = _t2; +_t3: bool = c; +if _t3 { + _t4: u8 = 1; + _t1: u8 = _t4 +}; +_t5: u8 = _t1; +return _t5 + +--- Assign-Transformed Generated AST +{ + let _t1: u8 = 0; + if c { + _t1: u8 = 1 + }; + return _t1 +} + + +=== Processing m::if_else_1 ===================================================== +--- Source +fun if_else_1(c: bool): u8 { + if (c) 1 else 2 + } + +--- Stackless Bytecode +fun m::if_else_1($t0|c: bool): u8 { + var $t1|return: u8 + var $t2: bool + var $t3: u8 + var $t4: u8 + var $t5: u8 + 0: $t2 := move($t0) + 1: if ($t2) goto 2 else goto 6 + 2: label L1 + 3: $t3 := 1 + 4: $t1 := $t3 + 5: goto 10 + 6: label L0 + 7: $t4 := 2 + 8: $t1 := $t4 + 9: goto 10 + 10: label L2 + 11: $t5 := move($t1) + 12: return $t5 +} + +--- Raw Generated AST +_t2: bool = c; +loop { + loop { + if (Not(_t2)) break; + _t3: u8 = 1; + _t1: u8 = _t3; + break[1] + }; + _t4: u8 = 2; + _t1: u8 = _t4; + break +}; +_t5: u8 = _t1; +return _t5 + +--- If-Transformed Generated AST +_t2: bool = c; +if _t2 { + _t3: u8 = 1; + _t1: u8 = _t3 +} else { + _t4: u8 = 2; + _t1: u8 = _t4 +}; +_t5: u8 = _t1; +return _t5 + +--- Assign-Transformed Generated AST +{ + let _t1: u8; + if c { + _t1: u8 = 1 + } else { + _t1: u8 = 2 + }; + return _t1 +} + + +=== Processing m::if_else_2 ===================================================== +--- Source +fun if_else_2(c: bool, d: bool): u8 { + if (c) { + if (d) { + 1 + } else { + 2 + } + } else { + 3 + } + } + +--- Stackless Bytecode +fun m::if_else_2($t0|c: bool, $t1|d: bool): u8 { + var $t2|return: u8 + var $t3: bool + var $t4: bool + var $t5: u8 + var $t6: u8 + var $t7: u8 + var $t8: u8 + 0: $t3 := move($t0) + 1: if ($t3) goto 2 else goto 15 + 2: label L1 + 3: $t4 := move($t1) + 4: if ($t4) goto 5 else goto 9 + 5: label L3 + 6: $t5 := 1 + 7: $t2 := $t5 + 8: goto 13 + 9: label L2 + 10: $t6 := 2 + 11: $t2 := $t6 + 12: goto 13 + 13: label L4 + 14: goto 19 + 15: label L0 + 16: $t7 := 3 + 17: $t2 := $t7 + 18: goto 19 + 19: label L5 + 20: $t8 := move($t2) + 21: return $t8 +} + +--- Raw Generated AST +_t3: bool = c; +loop { + loop { + if (_t3) break; + _t7: u8 = 3; + _t2: u8 = _t7; + break[1] + }; + _t4: bool = d; + loop { + loop { + if (Not(_t4)) break; + _t5: u8 = 1; + _t2: u8 = _t5; + break[1] + }; + _t6: u8 = 2; + _t2: u8 = _t6; + break + }; + break +}; +_t8: u8 = _t2; +return _t8 + +--- If-Transformed Generated AST +_t3: bool = c; +if _t3 { + _t4: bool = d; + if _t4 { + _t5: u8 = 1; + _t2: u8 = _t5 + } else { + _t6: u8 = 2; + _t2: u8 = _t6 + } +} else { + _t7: u8 = 3; + _t2: u8 = _t7 +}; +_t8: u8 = _t2; +return _t8 + +--- Assign-Transformed Generated AST +{ + let _t2: u8; + if c { + if d { + _t2: u8 = 1 + } else { + _t2: u8 = 2 + } + } else { + _t2: u8 = 3 + }; + return _t2 +} + + +=== Processing m::if_else_3 ===================================================== +--- Source +fun if_else_3(c: bool): u64 { + let r = if (c) 1 else 2; + r + } + +--- Stackless Bytecode +fun m::if_else_3($t0|c: bool): u64 { + var $t1|r: u64 + var $t2: bool + var $t3: u64 + var $t4: u64 + var $t5: u64 + 0: $t2 := move($t0) + 1: if ($t2) goto 2 else goto 6 + 2: label L1 + 3: $t3 := 1 + 4: $t1 := $t3 + 5: goto 10 + 6: label L0 + 7: $t4 := 2 + 8: $t1 := $t4 + 9: goto 10 + 10: label L2 + 11: $t5 := move($t1) + 12: return $t5 +} + +--- Raw Generated AST +_t2: bool = c; +loop { + loop { + if (Not(_t2)) break; + _t3: u64 = 1; + _t1: u64 = _t3; + break[1] + }; + _t4: u64 = 2; + _t1: u64 = _t4; + break +}; +_t5: u64 = _t1; +return _t5 + +--- If-Transformed Generated AST +_t2: bool = c; +if _t2 { + _t3: u64 = 1; + _t1: u64 = _t3 +} else { + _t4: u64 = 2; + _t1: u64 = _t4 +}; +_t5: u64 = _t1; +return _t5 + +--- Assign-Transformed Generated AST +{ + let _t1: u64; + if c { + _t1: u64 = 1 + } else { + _t1: u64 = 2 + }; + return _t1 +} + + +=== Processing m::if_else_with_shard_exp ===================================================== +--- Source +fun if_else_with_shard_exp(x: u64): u64 { + let y = x + x; + let z = y * y; + if (z > 0) z + 1 else z - 1 + } + +--- Stackless Bytecode +fun m::if_else_with_shard_exp($t0|x: u64): u64 { + var $t1|$t5: u64 + var $t2|$t7: u64 [unused] + var $t3: u64 + var $t4: u64 + var $t5: u64 + var $t6: u64 + var $t7: u64 + var $t8: u64 + var $t9: u64 + var $t10: u64 + var $t11: bool + var $t12: u64 + var $t13: u64 + var $t14: u64 + var $t15: u64 + var $t16: u64 + var $t17: u64 + var $t18: u64 + 0: $t3 := copy($t0) + 1: $t4 := move($t0) + 2: $t5 := +($t3, $t4) + 3: $t0 := $t5 + 4: $t6 := copy($t0) + 5: $t7 := move($t0) + 6: $t8 := *($t6, $t7) + 7: $t0 := $t8 + 8: $t9 := copy($t0) + 9: $t10 := 0 + 10: $t11 := >($t9, $t10) + 11: if ($t11) goto 12 else goto 18 + 12: label L1 + 13: $t12 := move($t0) + 14: $t13 := 1 + 15: $t14 := +($t12, $t13) + 16: $t1 := $t14 + 17: goto 24 + 18: label L0 + 19: $t15 := move($t0) + 20: $t16 := 1 + 21: $t17 := -($t15, $t16) + 22: $t1 := $t17 + 23: goto 24 + 24: label L2 + 25: $t18 := move($t1) + 26: return $t18 +} + +--- Raw Generated AST +_t3: u64 = x; +_t4: u64 = x; +_t5: u64 = Add(_t3, _t4); +x: u64 = _t5; +_t6: u64 = x; +_t7: u64 = x; +_t8: u64 = Mul(_t6, _t7); +x: u64 = _t8; +_t9: u64 = x; +_t10: u64 = 0; +_t11: bool = Gt(_t9, _t10); +loop { + loop { + if (Not(_t11)) break; + _t12: u64 = x; + _t13: u64 = 1; + _t14: u64 = Add(_t12, _t13); + _t1: u64 = _t14; + break[1] + }; + _t15: u64 = x; + _t16: u64 = 1; + _t17: u64 = Sub(_t15, _t16); + _t1: u64 = _t17; + break +}; +_t18: u64 = _t1; +return _t18 + +--- If-Transformed Generated AST +_t3: u64 = x; +_t4: u64 = x; +_t5: u64 = Add(_t3, _t4); +x: u64 = _t5; +_t6: u64 = x; +_t7: u64 = x; +_t8: u64 = Mul(_t6, _t7); +x: u64 = _t8; +_t9: u64 = x; +_t10: u64 = 0; +_t11: bool = Gt(_t9, _t10); +if _t11 { + _t12: u64 = x; + _t13: u64 = 1; + _t14: u64 = Add(_t12, _t13); + _t1: u64 = _t14 +} else { + _t15: u64 = x; + _t16: u64 = 1; + _t17: u64 = Sub(_t15, _t16); + _t1: u64 = _t17 +}; +_t18: u64 = _t1; +return _t18 + +--- Assign-Transformed Generated AST +{ + let _t1: u64; + { + let x: u64 = Add(x, x); + { + let x: u64 = Mul(x, x); + if Gt(x, 0) { + _t1: u64 = Add(x, 1) + } else { + _t1: u64 = Sub(x, 1) + }; + return _t1 + } + } +} diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.move b/third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.move new file mode 100644 index 0000000000000..f7b1261d66cb7 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/conditionals.move @@ -0,0 +1,38 @@ +module 0x815::m { + + fun if_else_1(c: bool): u8 { + if (c) 1 else 2 + } + + fun if_else_2(c: bool, d: bool): u8 { + if (c) { + if (d) { + 1 + } else { + 2 + } + } else { + 3 + } + } + + + fun if_1(c: bool): u8 { + let result = 0; + if (c) { + result = 1; + }; + result + } + + fun if_else_3(c: bool): u64 { + let r = if (c) 1 else 2; + r + } + + fun if_else_with_shard_exp(x: u64): u64 { + let y = x + x; + let z = y * y; + if (z > 0) z + 1 else z - 1 + } +} diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.exp b/third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.exp new file mode 100644 index 0000000000000..7437f0f349ad5 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.exp @@ -0,0 +1,465 @@ + +=== Processing m::loop_1 ===================================================== +--- Source +fun loop_1(c: u64): u64 { + loop { + c = c + 1; + if (c % 2 == 0) continue; + c = c + 3; + if (c % 2 == 1) break; + }; + c + } + +--- Stackless Bytecode +fun m::loop_1($t0|c: u64): u64 { + var $t1|$t3: u64 [unused] + var $t2: u64 + var $t3: u64 + var $t4: u64 + var $t5: u64 + var $t6: u64 + var $t7: u64 + var $t8: u64 + var $t9: bool + var $t10: u64 + var $t11: u64 + var $t12: u64 + var $t13: u64 + var $t14: u64 + var $t15: u64 + var $t16: u64 + var $t17: bool + var $t18: u64 + 0: label L2 + 1: $t2 := move($t0) + 2: $t3 := 1 + 3: $t4 := +($t2, $t3) + 4: $t0 := $t4 + 5: $t5 := copy($t0) + 6: $t6 := 2 + 7: $t7 := %($t5, $t6) + 8: $t8 := 0 + 9: $t9 := ==($t7, $t8) + 10: if ($t9) goto 11 else goto 13 + 11: label L1 + 12: goto 0 + 13: label L0 + 14: $t10 := move($t0) + 15: $t11 := 3 + 16: $t12 := +($t10, $t11) + 17: $t0 := $t12 + 18: $t13 := copy($t0) + 19: $t14 := 2 + 20: $t15 := %($t13, $t14) + 21: $t16 := 1 + 22: $t17 := ==($t15, $t16) + 23: if ($t17) goto 24 else goto 26 + 24: label L4 + 25: goto 28 + 26: label L3 + 27: goto 0 + 28: label L5 + 29: $t18 := move($t0) + 30: return $t18 +} + +--- Raw Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 1; + _t4: u64 = Add(_t2, _t3); + c: u64 = _t4; + _t5: u64 = c; + _t6: u64 = 2; + _t7: u64 = Mod(_t5, _t6); + _t8: u64 = 0; + _t9: bool = Eq(_t7, _t8); + if (_t9) continue; + _t10: u64 = c; + _t11: u64 = 3; + _t12: u64 = Add(_t10, _t11); + c: u64 = _t12; + _t13: u64 = c; + _t14: u64 = 2; + _t15: u64 = Mod(_t13, _t14); + _t16: u64 = 1; + _t17: bool = Eq(_t15, _t16); + if (Not(_t17)) continue; + break +}; +_t18: u64 = c; +return _t18 + +--- If-Transformed Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 1; + _t4: u64 = Add(_t2, _t3); + c: u64 = _t4; + _t5: u64 = c; + _t6: u64 = 2; + _t7: u64 = Mod(_t5, _t6); + _t8: u64 = 0; + _t9: bool = Eq(_t7, _t8); + if (_t9) continue; + _t10: u64 = c; + _t11: u64 = 3; + _t12: u64 = Add(_t10, _t11); + c: u64 = _t12; + _t13: u64 = c; + _t14: u64 = 2; + _t15: u64 = Mod(_t13, _t14); + _t16: u64 = 1; + _t17: bool = Eq(_t15, _t16); + if (Not(_t17)) continue; + break +}; +_t18: u64 = c; +return _t18 + +--- Assign-Transformed Generated AST +loop { + c: u64 = Add(c, 1); + if (Eq(Mod(c, 2), 0)) continue; + c: u64 = Add(c, 3); + if (Not(Eq(Mod(c, 2), 1))) continue; + break +}; +return c + + +=== Processing m::while_1 ===================================================== +--- Source +fun while_1(c: u64) { + while (c > 0) c = c - 1 + } + +--- Stackless Bytecode +fun m::while_1($t0|c: u64) { + var $t1|$t2: u64 [unused] + var $t2: u64 + var $t3: u64 + var $t4: bool + var $t5: u64 + var $t6: u64 + var $t7: u64 + 0: label L4 + 1: $t2 := copy($t0) + 2: $t3 := 0 + 3: $t4 := >($t2, $t3) + 4: if ($t4) goto 5 else goto 11 + 5: label L1 + 6: $t5 := move($t0) + 7: $t6 := 1 + 8: $t7 := -($t5, $t6) + 9: $t0 := $t7 + 10: goto 13 + 11: label L0 + 12: goto 15 + 13: label L2 + 14: goto 0 + 15: label L3 + 16: return () +} + +--- Raw Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 0; + _t4: bool = Gt(_t2, _t3); + loop { + if (Not(_t4)) break; + _t5: u64 = c; + _t6: u64 = 1; + _t7: u64 = Sub(_t5, _t6); + c: u64 = _t7; + continue[1] + }; + break +}; +return Tuple() + +--- If-Transformed Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 0; + _t4: bool = Gt(_t2, _t3); + if _t4 { + _t5: u64 = c; + _t6: u64 = 1; + _t7: u64 = Sub(_t5, _t6); + c: u64 = _t7; + continue + }; + break +}; +return Tuple() + +--- Assign-Transformed Generated AST +loop { + if Gt(c, 0) { + { + let c: u64 = Sub(c, 1); + continue + } + }; + break +}; +return Tuple() + + +=== Processing m::while_2 ===================================================== +--- Source +fun while_2(c: u64): u64 { + while (c > 0) { + if (c >= 10) { + c = c - 10 + } + }; + c = c + 1; + c + } + +--- Stackless Bytecode +fun m::while_2($t0|c: u64): u64 { + var $t1|$t3: u64 [unused] + var $t2: u64 + var $t3: u64 + var $t4: bool + var $t5: u64 + var $t6: u64 + var $t7: bool + var $t8: u64 + var $t9: u64 + var $t10: u64 + var $t11: u64 + var $t12: u64 + var $t13: u64 + 0: label L6 + 1: $t2 := copy($t0) + 2: $t3 := 0 + 3: $t4 := >($t2, $t3) + 4: if ($t4) goto 5 else goto 18 + 5: label L1 + 6: $t5 := copy($t0) + 7: $t6 := 10 + 8: $t7 := >=($t5, $t6) + 9: if ($t7) goto 10 else goto 16 + 10: label L3 + 11: $t8 := move($t0) + 12: $t9 := 10 + 13: $t10 := -($t8, $t9) + 14: $t0 := $t10 + 15: goto 16 + 16: label L2 + 17: goto 20 + 18: label L0 + 19: goto 22 + 20: label L4 + 21: goto 0 + 22: label L5 + 23: $t11 := move($t0) + 24: $t12 := 1 + 25: $t13 := +($t11, $t12) + 26: return $t13 +} + +--- Raw Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 0; + _t4: bool = Gt(_t2, _t3); + loop { + if (Not(_t4)) break; + _t5: u64 = c; + _t6: u64 = 10; + _t7: bool = Ge(_t5, _t6); + if (Not(_t7)) continue[1]; + _t8: u64 = c; + _t9: u64 = 10; + _t10: u64 = Sub(_t8, _t9); + c: u64 = _t10; + continue[1] + }; + break +}; +_t11: u64 = c; +_t12: u64 = 1; +_t13: u64 = Add(_t11, _t12); +return _t13 + +--- If-Transformed Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 0; + _t4: bool = Gt(_t2, _t3); + if _t4 { + _t5: u64 = c; + _t6: u64 = 10; + _t7: bool = Ge(_t5, _t6); + if (Not(_t7)) continue; + _t8: u64 = c; + _t9: u64 = 10; + _t10: u64 = Sub(_t8, _t9); + c: u64 = _t10; + continue + }; + break +}; +_t11: u64 = c; +_t12: u64 = 1; +_t13: u64 = Add(_t11, _t12); +return _t13 + +--- Assign-Transformed Generated AST +loop { + if Gt(c, 0) { + if (Not(Ge(c, 10))) continue; + c: u64 = Sub(c, 10); + continue + }; + break +}; +return Add(c, 1) + + +=== Processing m::while_3 ===================================================== +--- Source +fun while_3(c: u64): u64 { + while (c > 0) { + while (c > 10) c = c - 10; + c = c - 1; + }; + c + } + +--- Stackless Bytecode +fun m::while_3($t0|c: u64): u64 { + var $t1|$t3: u64 [unused] + var $t2: u64 + var $t3: u64 + var $t4: bool + var $t5: u64 + var $t6: u64 + var $t7: bool + var $t8: u64 + var $t9: u64 + var $t10: u64 + var $t11: u64 + var $t12: u64 + var $t13: u64 + var $t14: u64 + 0: label L8 + 1: $t2 := copy($t0) + 2: $t3 := 0 + 3: $t4 := >($t2, $t3) + 4: if ($t4) goto 5 else goto 26 + 5: label L1 + 6: $t5 := copy($t0) + 7: $t6 := 10 + 8: $t7 := >($t5, $t6) + 9: if ($t7) goto 10 else goto 16 + 10: label L3 + 11: $t8 := move($t0) + 12: $t9 := 10 + 13: $t10 := -($t8, $t9) + 14: $t0 := $t10 + 15: goto 18 + 16: label L2 + 17: goto 20 + 18: label L4 + 19: goto 5 + 20: label L5 + 21: $t11 := move($t0) + 22: $t12 := 1 + 23: $t13 := -($t11, $t12) + 24: $t0 := $t13 + 25: goto 28 + 26: label L0 + 27: goto 30 + 28: label L6 + 29: goto 0 + 30: label L7 + 31: $t14 := move($t0) + 32: return $t14 +} + +--- Raw Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 0; + _t4: bool = Gt(_t2, _t3); + loop { + if (Not(_t4)) break; + loop { + _t5: u64 = c; + _t6: u64 = 10; + _t7: bool = Gt(_t5, _t6); + loop { + if (Not(_t7)) break; + _t8: u64 = c; + _t9: u64 = 10; + _t10: u64 = Sub(_t8, _t9); + c: u64 = _t10; + continue[1] + }; + break + }; + _t11: u64 = c; + _t12: u64 = 1; + _t13: u64 = Sub(_t11, _t12); + c: u64 = _t13; + continue[1] + }; + break +}; +_t14: u64 = c; +return _t14 + +--- If-Transformed Generated AST +loop { + _t2: u64 = c; + _t3: u64 = 0; + _t4: bool = Gt(_t2, _t3); + if _t4 { + loop { + _t5: u64 = c; + _t6: u64 = 10; + _t7: bool = Gt(_t5, _t6); + if _t7 { + _t8: u64 = c; + _t9: u64 = 10; + _t10: u64 = Sub(_t8, _t9); + c: u64 = _t10; + continue + }; + break + }; + _t11: u64 = c; + _t12: u64 = 1; + _t13: u64 = Sub(_t11, _t12); + c: u64 = _t13; + continue + }; + break +}; +_t14: u64 = c; +return _t14 + +--- Assign-Transformed Generated AST +loop { + if Gt(c, 0) { + loop { + if Gt(c, 10) { + c: u64 = Sub(c, 10); + continue + }; + break + }; + c: u64 = Sub(c, 1); + continue + }; + break +}; +return c diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.move b/third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.move new file mode 100644 index 0000000000000..95a671341c962 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/loops.move @@ -0,0 +1,34 @@ +module 0x815::m { + + fun while_1(c: u64) { + while (c > 0) c = c - 1 + } + + fun while_2(c: u64): u64 { + while (c > 0) { + if (c >= 10) { + c = c - 10 + } + }; + c = c + 1; + c + } + + fun while_3(c: u64): u64 { + while (c > 0) { + while (c > 10) c = c - 10; + c = c - 1; + }; + c + } + + fun loop_1(c: u64): u64 { + loop { + c = c + 1; + if (c % 2 == 0) continue; + c = c + 3; + if (c % 2 == 1) break; + }; + c + } +} diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/match.exp b/third_party/move/move-model/bytecode/ast-generator-tests/tests/match.exp new file mode 100644 index 0000000000000..e8a60a0487de5 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/match.exp @@ -0,0 +1,299 @@ + +=== Processing m::id ===================================================== +--- Source +fun id(self: &Entity): u64 { + match (self) { + Person{id} => *id, + Institution{id, ..} => *id + } + } + +--- Stackless Bytecode +fun m::id($t0|self: &m::Entity): u64 { + var $t1|return: u64 + var $t2: &m::Entity + var $t3: bool + var $t4: &m::Entity + var $t5: &u64 + var $t6: u64 + var $t7: &m::Entity + var $t8: bool + var $t9: &m::Entity + var $t10: &u64 + var $t11: u64 + var $t12: &m::Entity + var $t13: u64 + var $t14: u64 + 0: $t2 := copy($t0) + 1: $t3 := test_variant m::Entity::Person($t2) + 2: if ($t3) goto 3 else goto 9 + 3: label L1 + 4: $t4 := move($t0) + 5: $t5 := borrow_variant_field.id($t4) + 6: $t6 := read_ref($t5) + 7: $t1 := $t6 + 8: goto 24 + 9: label L0 + 10: $t7 := copy($t0) + 11: $t8 := test_variant m::Entity::Institution($t7) + 12: if ($t8) goto 13 else goto 19 + 13: label L4 + 14: $t9 := move($t0) + 15: $t10 := borrow_variant_field.id($t9) + 16: $t11 := read_ref($t10) + 17: $t1 := $t11 + 18: goto 24 + 19: label L3 + 20: $t12 := move($t0) + 21: drop($t12) + 22: $t13 := 14566554180833181697 + 23: abort($t13) + 24: label L2 + 25: $t14 := move($t1) + 26: return $t14 +} + +--- Raw Generated AST +_t2: &m::Entity = self; +_t3: bool = test_variants m::Entity::Person(_t2); +loop { + loop { + if (Not(_t3)) break; + _t4: &m::Entity = self; + _t5: &u64 = select_variants m::Entity.id(_t4); + _t6: u64 = Deref(_t5); + _t1: u64 = _t6; + break[1] + }; + _t7: &m::Entity = self; + _t8: bool = test_variants m::Entity::Institution(_t7); + loop { + loop { + if (Not(_t8)) break; + _t9: &m::Entity = self; + _t10: &u64 = select_variants m::Entity.id(_t9); + _t11: u64 = Deref(_t10); + _t1: u64 = _t11; + break[1] + }; + _t12: &m::Entity = self; + _t13: u64 = 14566554180833181697; + Abort(_t13) + }; + break +}; +_t14: u64 = _t1; +return _t14 + +--- If-Transformed Generated AST +_t2: &m::Entity = self; +_t3: bool = test_variants m::Entity::Person(_t2); +if _t3 { + _t4: &m::Entity = self; + _t5: &u64 = select_variants m::Entity.id(_t4); + _t6: u64 = Deref(_t5); + _t1: u64 = _t6 +} else { + _t7: &m::Entity = self; + _t8: bool = test_variants m::Entity::Institution(_t7); + if _t8 { + _t9: &m::Entity = self; + _t10: &u64 = select_variants m::Entity.id(_t9); + _t11: u64 = Deref(_t10); + _t1: u64 = _t11 + } else { + _t12: &m::Entity = self; + _t13: u64 = 14566554180833181697; + Abort(_t13) + } +}; +_t14: u64 = _t1; +return _t14 + +--- Assign-Transformed Generated AST +{ + let _t1: u64; + if test_variants m::Entity::Person(self) { + _t1: u64 = Deref(select_variants m::Entity.id(self)) + } else { + if test_variants m::Entity::Institution(self) { + _t1: u64 = Deref(select_variants m::Entity.id(self)) + } else { + Abort(14566554180833181697) + } + }; + return _t1 +} + + +=== Processing m::id2 ===================================================== +--- Source +fun id2(self: Entity): u64 { + match (self) { + Person{id} if id > 0 => id, + Institution{id, ..} => id, + _ => 0 + } + } + +--- Stackless Bytecode +fun m::id2($t0|self: m::Entity): u64 { + var $t1|$t2: &m::Entity + var $t2|$t5: u64 + var $t3: &m::Entity + var $t4: &m::Entity + var $t5: bool + var $t6: &m::Entity + var $t7: &u64 + var $t8: u64 + var $t9: u64 + var $t10: bool + var $t11: &m::Entity + var $t12: m::Entity + var $t13: u64 + var $t14: &m::Entity + var $t15: bool + var $t16: m::Entity + var $t17: u64 + var $t18: u64 + var $t19: u64 + var $t20: u64 + 0: $t3 := borrow_local($t0) + 1: $t1 := $t3 + 2: $t4 := copy($t1) + 3: $t5 := test_variant m::Entity::Person($t4) + 4: if ($t5) goto 5 else goto 19 + 5: label L1 + 6: $t6 := copy($t1) + 7: $t7 := borrow_variant_field.id($t6) + 8: $t8 := read_ref($t7) + 9: $t9 := 0 + 10: $t10 := >($t8, $t9) + 11: if ($t10) goto 12 else goto 19 + 12: label L2 + 13: $t11 := move($t1) + 14: drop($t11) + 15: $t12 := move($t0) + 16: $t13 := unpack_variant m::Entity::Person($t12) + 17: $t2 := $t13 + 18: goto 33 + 19: label L0 + 20: $t14 := move($t1) + 21: $t15 := test_variant m::Entity::Institution($t14) + 22: if ($t15) goto 23 else goto 29 + 23: label L5 + 24: $t16 := move($t0) + 25: ($t17, $t18) := unpack_variant m::Entity::Institution($t16) + 26: drop($t18) + 27: $t2 := $t17 + 28: goto 33 + 29: label L4 + 30: $t19 := 0 + 31: $t2 := $t19 + 32: goto 33 + 33: label L3 + 34: $t20 := move($t2) + 35: return $t20 +} + +--- Raw Generated AST +_t3: &m::Entity = Borrow(Immutable)(self); +_t1: &m::Entity = _t3; +_t4: &m::Entity = _t1; +_t5: bool = test_variants m::Entity::Person(_t4); +loop { + loop { + if (Not(_t5)) break; + _t6: &m::Entity = _t1; + _t7: &u64 = select_variants m::Entity.id(_t6); + _t8: u64 = Deref(_t7); + _t9: u64 = 0; + _t10: bool = Gt(_t8, _t9); + if (Not(_t10)) break; + _t11: &m::Entity = _t1; + _t12: m::Entity = self; + m::Entity::Person{ id: _t13 } = _t12; + _t2: u64 = _t13; + break[1] + }; + _t14: &m::Entity = _t1; + _t15: bool = test_variants m::Entity::Institution(_t14); + loop { + loop { + if (Not(_t15)) break; + _t16: m::Entity = self; + m::Entity::Institution{ id: _t17, admin: _t18 } = _t16; + _t2: u64 = _t17; + break[1] + }; + _t19: u64 = 0; + _t2: u64 = _t19; + break + }; + break +}; +_t20: u64 = _t2; +return _t20 + +--- If-Transformed Generated AST +_t3: &m::Entity = Borrow(Immutable)(self); +_t1: &m::Entity = _t3; +_t4: &m::Entity = _t1; +_t5: bool = test_variants m::Entity::Person(_t4); +loop { + loop { + if (Not(_t5)) break; + _t6: &m::Entity = _t1; + _t7: &u64 = select_variants m::Entity.id(_t6); + _t8: u64 = Deref(_t7); + _t9: u64 = 0; + _t10: bool = Gt(_t8, _t9); + if (Not(_t10)) break; + _t11: &m::Entity = _t1; + _t12: m::Entity = self; + m::Entity::Person{ id: _t13 } = _t12; + _t2: u64 = _t13; + break[1] + }; + _t14: &m::Entity = _t1; + _t15: bool = test_variants m::Entity::Institution(_t14); + if _t15 { + _t16: m::Entity = self; + m::Entity::Institution{ id: _t17, admin: _t18 } = _t16; + _t2: u64 = _t17 + } else { + _t19: u64 = 0; + _t2: u64 = _t19 + }; + break +}; +_t20: u64 = _t2; +return _t20 + +--- Assign-Transformed Generated AST +{ + let _t2: u64; + { + let _t1: &m::Entity = Borrow(Immutable)(self); + loop { + loop { + if (Not(test_variants m::Entity::Person(_t1))) break; + if (Not(Gt(Deref(select_variants m::Entity.id(_t1)), 0))) break; + { + let m::Entity::Person{ id: _t13 } = self; + break[1] + } + }; + if test_variants m::Entity::Institution(_t1) { + { + let m::Entity::Institution{ id: _t17, admin: _t18 } = self; + _t2: u64 = _t17 + } + } else { + _t2: u64 = 0 + }; + break + }; + return _t2 + } +} diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/match.move b/third_party/move/move-model/bytecode/ast-generator-tests/tests/match.move new file mode 100644 index 0000000000000..451429e02f126 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/match.move @@ -0,0 +1,21 @@ +module 0x815::m { + enum Entity has drop { + Person { id: u64 }, + Institution { id: u64, admin: u64 } + } + + fun id(self: &Entity): u64 { + match (self) { + Person{id} => *id, + Institution{id, ..} => *id + } + } + + fun id2(self: Entity): u64 { + match (self) { + Person{id} if id > 0 => id, + Institution{id, ..} => id, + _ => 0 + } + } +} diff --git a/third_party/move/move-model/bytecode/ast-generator-tests/tests/testsuite.rs b/third_party/move/move-model/bytecode/ast-generator-tests/tests/testsuite.rs new file mode 100644 index 0000000000000..a4b78b63a7759 --- /dev/null +++ b/third_party/move/move-model/bytecode/ast-generator-tests/tests/testsuite.rs @@ -0,0 +1,115 @@ +// Copyright © Aptos Foundation +// Parts of the project are originally copyright © Meta Platforms, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use codespan_reporting::term::termcolor::Buffer; +use move_compiler_v2::{logging, run_move_compiler_for_analysis, Options}; +use move_model::{metadata::LanguageVersion, model::GlobalEnv}; +use move_prover_test_utils::{baseline_test, extract_test_directives}; +use move_stackless_bytecode::{ + astifier, + function_target::FunctionTarget, + function_target_pipeline::{FunctionTargetsHolder, FunctionVariant}, +}; +use std::path::{Path, PathBuf}; + +/// Extension for expected output files +pub const EXP_EXT: &str = "exp"; + +datatest_stable::harness!(test_runner, "tests", r".*\.move$"); + +fn test_runner(path: &Path) -> datatest_stable::Result<()> { + logging::setup_logging_for_testing(); + let path_str = path.display().to_string(); + let mut options = Options { + sources_deps: extract_test_directives(path, "// dep:")?, + sources: vec![path_str.clone()], + dependencies: if extract_test_directives(path, "// no-stdlib")?.is_empty() { + vec![path_from_crate_root("../../../move-stdlib/sources")] + } else { + vec![] + }, + named_address_mapping: vec!["std=0x1".to_string()], + ..Options::default() + }; + options = options.set_language_version(LanguageVersion::V2_1); + let mut test_output = String::new(); + let mut error_writer = Buffer::no_color(); + match run_move_compiler_for_analysis(&mut error_writer, options) { + Err(_) => { + test_output.push_str(&format!( + "--- Aborting with compilation errors:\n{}\n", + String::from_utf8_lossy(&error_writer.into_inner()) + )); + }, + Ok(env) => { + let targets = create_targets(&env); + for fun_id in targets.get_funs() { + generate_output( + &targets.get_target(&env.get_function(fun_id), &FunctionVariant::Baseline), + &mut test_output, + ) + } + }, + } + // Generate/check baseline. + let baseline_path = path.with_extension(EXP_EXT); + baseline_test::verify_or_update_baseline(baseline_path.as_path(), &test_output)?; + Ok(()) +} + +fn generate_output(target: &FunctionTarget, test_output: &mut String) { + *test_output += &format!( + "\n=== Processing {} ===================================================== \n", + target.func_env.get_full_name_str() + ); + *test_output += &format!( + "--- Source\n{}\n", + target + .global_env() + .get_source(&target.get_loc()) + .unwrap_or("UNKNOWN") + ); + + *test_output += &format!("\n--- Stackless Bytecode\n{}\n", target); + + let Some(exp) = astifier::generate_ast_raw(target) else { + *test_output += "--- Raw Generated AST\nFAILED\n"; + return; + }; + *test_output += &format!( + "--- Raw Generated AST\n{}\n\n", + exp.display_for_fun(target.func_env.clone()) + ); + let exp = astifier::transform_conditionals(target, exp); + *test_output += &format!( + "--- If-Transformed Generated AST\n{}\n\n", + exp.display_for_fun(target.func_env.clone()) + ); + let exp = astifier::transform_assigns(target, exp); + *test_output += &format!( + "--- Assign-Transformed Generated AST\n{}\n\n", + exp.display_for_fun(target.func_env.clone()) + ); +} + +/// Create function targets with stackless bytecode for modules which are target. +/// This decompiles Move binary format into stackless bytecode. +fn create_targets(env: &GlobalEnv) -> FunctionTargetsHolder { + let mut targets = FunctionTargetsHolder::default(); + for module_env in env.get_modules() { + if module_env.is_primary_target() { + for func_env in module_env.get_functions() { + targets.add_target(&func_env) + } + } + } + targets +} + +/// Returns a path relative to the crate root. +fn path_from_crate_root(path: &str) -> String { + let mut buf = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + buf.push(path); + buf.to_string_lossy().to_string() +} diff --git a/third_party/move/move-model/bytecode/src/astifier.rs b/third_party/move/move-model/bytecode/src/astifier.rs new file mode 100644 index 0000000000000..f3b0e2e6002ec --- /dev/null +++ b/third_party/move/move-model/bytecode/src/astifier.rs @@ -0,0 +1,1998 @@ +// Copyright © Aptos Foundation +// Parts of the project are originally copyright © Meta Platforms, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! Converts stackless bytecode into Model AST. +//! +//! See [this article](https://medium.com/leaningtech/solving-the-structured-control-flow-problem-once-and-for-all-5123117b1ee2) +//! for an introduction into how this code works. This is an excellent high-level overview of +//! the decompilation problem and the solution which is adapted here. The article is a relative +//! light read and hence leaves many details open, which an implementation has to determine. +//! +//! In a nutshell, the decompilation fromm stackless bytecode into the Model AST here +//! works in the steps outlined below +//! +//! # 1. Cleanup +//! +//! The code is cleaned up such that there are no jump proxies if the form `label L; goto L1`. +//! Also, adjacent sequential blocks are merged, and fallthroughs extended by explicit jumps. +//! It is important for the algorithm to work that all those intermediate blocks are removed +//! and all remaining blocks represent a true branching structure. +//! +//! # 2. Loop Analysis +//! +//! Compute loop information using the `fat_loop` module. This allows to distinguish +//! backward jumps from forward jumps. The fat_loop bails out if the control graph is +//! not reducible. +//! +//! # 3. Topological Sorting +//! +//! Topological sort the blocks using forward edges only. For those blocks which are not related +//! in the partial order, prioritize that for a branch `if c goto L1 else goto L2`, target +//! blocks follow after the branch. Moreover, for any blocks belonging to a loop, ensure that +//! they are all appear before any blocks which are part of the loop. +//! +//! # 4. Raw AST Generation +//! +//! The article linked above describes well how blocks can be used to synthesize structured +//! code. However, it leaves open when to open blocks and when to close them. +//! +//! First, in Move, we express blocks (whether they have back jumps and are proper loops or not) +//! by `loop { ..; break }`. Now, in the presence of nested `break[n]` and `continue[n]`, any +//! forward (via break) and backward (via continue) jump can be modelled. +//! +//! ## 4.1 Opening Loops +//! +//! Loops are opened when we either encounter a loop header or a branch. We know what block +//! a loop header is and what are the back edges from (2) above. tIn the case of +//! branches we need two loops. Consider bytecode for an if-then-else +//! +//! ```ignore +//! if c goto L1 else goto L2 +//! label L1 +//! .. then .. +//! goto L3 +//! label L2 +//! .. else .. +//! goto L3 +//! label L3 +//! .. end .. +//! ``` +//! +//! This is translated to: +//! +//! ```ignore +//! loop { +//! loop { +//! if !c break; +//! .. then .. +//! break[1] +//! } // L2 jumps here +//! .. else .. +//! break +//! } // L3 jumps here +//! ``` +//! +//! Notice at the point when those two loops are opened (on encountering +//! `if c goto L1 else goto L2`), the label `L3` is not known. It appears +//! somewhere later in the sequence of blocks, with an arbitrary number +//! of other blocks in between resulting from nested control flows. +//! The algorithm continues to process the blocks in topological +//! order, opening sub-blocks as needed, until it encounters a jump +//! to a label which is not yet associated with any loop, which is then +//! associated with the unbound loop label. In the above bytecode example, this +//! would be the first `jump L3` in sequence. This is sound +//! because of the way how the blocks were sorted in (3): there are no +//! "interleaving" control flows (as also mentioned in the linked article), +//! and at the moment `L3` is encountered, no other sub-graphs of the control flow +//! have still open blocks. +//! +//! ## 4.2 Closing Loops +//! +//! Loops are closed when their exit label is reached. For the above +//! example, the inner loop is closed on bytecode `label L2` and the +//! outer on `label 3`. Some loops which have been opened may never +//! be associated with a label (for example, in an +//! `if goto L1 else goto L2` where one of the targets is a back jump +//! in a loop). Those are closed when an enclosing loop's label +//! is reached. +//! +//! # 5. AST Transformations +//! +//! Once the raw AST has been generated as described above, it is +//! run through a few transformation steps: +//! +//! 1. *Conditional Transformation* discovers if-then-else +//! expressions from the nested loops as generated by the core +//! algorithm. This is based on pattern matching against the AST. +//! It is possible that the rules used here need to be extended +//! over time, as it is kind of heuristic. +//! 2. *Assignment Transformation* removes obsoletes assignments +//! by propagating reaching definitions. It also introduces +//! `let` declarations for temporaries scoped to a block. +//! +//! A number of tools are defined in this module to perform AST +//! transformations, including a data flow analysis +//! framework for ASTs in the form as generated by the core +//! algorithms. They are kept local here but can be pulled +//! out of this module if more general use cases arise. + +use crate::{ + dataflow_domains::{AbstractDomain, JoinResult, MapDomain, SetDomain}, + fat_loop::{build_loop_info, FatLoopFunctionInfo}, + function_target::FunctionTarget, + stackless_bytecode::{AttrId, Bytecode, Label, Operation as BytecodeOperation}, + stackless_control_flow_graph::{BlockId, StacklessControlFlowGraph}, +}; +use abstract_domain_derive::AbstractDomain; +use itertools::Itertools; +use log::{debug, log_enabled, Level}; +use move_binary_format::file_format::CodeOffset; +use move_model::{ + ast::{Exp, ExpData, Operation, Pattern, TempIndex}, + exp_builder::ExpBuilder, + exp_rewriter::ExpRewriterFunctions, + model::{GlobalEnv, Loc, NodeId, QualifiedInstId, StructId}, + symbol::Symbol, + ty::{ReferenceKind, Type}, +}; +use std::collections::{BTreeMap, BTreeSet}; +use topological_sort::TopologicalSort; + +const DEBUG: bool = false; + +// =========================================================================================== +// Ast Generator + +/// Main entry point for generating AST for a given function target with associated stackless +/// bytecode. +pub fn generate_ast(target: &FunctionTarget) -> Option { + let exp = generate_ast_raw(target)?; + Some(transform_assigns( + target, + transform_conditionals(target, exp), + )) +} + +/// Entry point for raw generation, without prettifying the AST. +pub fn generate_ast_raw(target: &FunctionTarget) -> Option { + // First cleanup the code. In order to make the algorithm work, unnecessary blocks need to + // be eliminated. + let cleanup_context = Context::new(target); + let cleaned_code = cleanup_context.clean_bytecode(); + let mut new_data = target.data.clone(); + new_data.code = cleaned_code; + let target = FunctionTarget::new(target.func_env, &new_data); + + // Now create a new context for working with the cleaned code. + let mut ctx = Context::new(&target); + + let unreached_labels = ctx + // All labels with corresponding blocks in code. + .forward_cfg + .blocks() + .iter() + .filter_map(|blk_id| ctx.label_of_block(*blk_id)) + .collect(); + + let fat_loop_info = match build_loop_info(&target) { + // Compute the fat loops of this code. A fat loop is a loop with multiple back-edges all + // sharing the same loop header. + Ok(loop_info) => loop_info, + Err(err) => { + // This happens if the cfg is not reducible. + // TODO: we may want to have a fallback strategy for this case. We can generate + // a big outer loop with a case for each block and a variable holding the current + // block number. + target.global_env().error( + &target.get_loc(), + &format!("cannot decompile function: {}", err), + ); + return None; + }, + }; + ctx.compute_loop_info(&fat_loop_info); + + // Create the generator and run it. + let mut gen = Generator { + block_stack: vec![], + unreached_labels, + used_labels: BTreeSet::new(), + current_attr: None, + }; + Some(gen.gen(&ctx)) +} + +// ------------------------------------------------------------------------------------------- +// Data Types + +/// Immutable context used for generation. +struct Context<'a> { + /// The function target. + target: &'a FunctionTarget<'a>, + /// Forward control flow graph of the given code. + forward_cfg: StacklessControlFlowGraph, + /// Expression builder. + builder: ExpBuilder<'a>, + /// Mapping from labels to the code offset they are associated with. + label_offsets: BTreeMap, + /// Loop headers. These are the labels of the blocks which dominate all other blocks + /// in a loop. + loop_headers: BTreeSet