From 29e2d8d6642e1dbeb65ab2ed79bcbc63afa41506 Mon Sep 17 00:00:00 2001 From: Wolfgang Grieskamp Date: Sun, 1 Sep 2024 23:51:15 -0700 Subject: [PATCH] [WIP][compiler-v2] Ast generator from stackless bytecode Converter from stackless bytecode into Model AST. We already have one from binary format to stackless. This will be useful in many places in the stack, not at least for debugging, but also as a potential different approach to decompilation. For the later, we still need to create an AST -> Move translator, today we have only debug dump. --- .../ast-generator/ast_gen_tests.exp | 99 +++ .../ast-generator/ast_gen_tests.move | 29 + .../move/move-compiler-v2/tests/testsuite.rs | 18 +- .../move-model/bytecode/src/ast_generator.rs | 669 ++++++++++++++++++ .../move/move-model/bytecode/src/lib.rs | 1 + .../bytecode/src/stackless_bytecode.rs | 30 +- .../src/stackless_control_flow_graph.rs | 14 + 7 files changed, 858 insertions(+), 2 deletions(-) create mode 100644 third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.exp create mode 100644 third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.move create mode 100644 third_party/move/move-model/bytecode/src/ast_generator.rs diff --git a/third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.exp b/third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.exp new file mode 100644 index 0000000000000..13466a82f4c8a --- /dev/null +++ b/third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.exp @@ -0,0 +1,99 @@ +// -- Model dump before bytecode pipeline +module 0x815::m { + private fun some_f(x: u64): u64 { + x + } + private fun t1(c: bool): bool { + c + } + private fun t2(c: bool): bool { + if c { + false + } else { + true + } + } + private fun t3(c: u64) { + loop { + if Gt(c, 0) { + c: u64 = m::some_f(c) + } else { + break + } + } + } +} // end 0x815::m + +============ initial bytecode ================ + +[variant baseline] +fun m::some_f($t0: u64): u64 { + var $t1: u64 + 0: $t1 := infer($t0) + 1: return $t1 +} + + +[variant baseline] +fun m::t1($t0: bool): bool { + var $t1: bool + 0: $t1 := infer($t0) + 1: return $t1 +} + + +[variant baseline] +fun m::t2($t0: bool): bool { + var $t1: bool + 0: if ($t0) goto 1 else goto 4 + 1: label L0 + 2: $t1 := false + 3: goto 6 + 4: label L1 + 5: $t1 := true + 6: label L2 + 7: return $t1 +} + + +[variant baseline] +fun m::t3($t0: u64) { + var $t1: bool + var $t2: u64 + var $t3: u64 + 0: label L0 + 1: $t2 := 0 + 2: $t1 := >($t0, $t2) + 3: if ($t1) goto 4 else goto 8 + 4: label L2 + 5: $t3 := m::some_f($t0) + 6: $t0 := infer($t3) + 7: goto 10 + 8: label L3 + 9: goto 12 + 10: label L4 + 11: goto 0 + 12: label L1 + 13: return () +} + +generated AST for some_f: +return x +generated AST for t1: +return c +generated AST for t2: +if c { + +} else { + NoOp() +} +generated AST for t3: +$t2: u64 = m::some_f(c); +$t2: u64 = 0; +if $t1 { + +} else { + NoOp() +} + +============ bytecode verification succeeded ======== diff --git a/third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.move b/third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.move new file mode 100644 index 0000000000000..1e5ed46681fee --- /dev/null +++ b/third_party/move/move-compiler-v2/tests/bytecode-generator/ast-generator/ast_gen_tests.move @@ -0,0 +1,29 @@ +module 0x815::m { + + fun some_f(x: u64): u64 { + x + } + + fun t1(c: bool): bool { + c + } + + fun t2(c: bool): bool { + if (c) false else true + } + + fun t3(c: u64) { + while (c > 0) c = some_f(c) + } + + /* + fun t4(c: u64) { + while (c > 0) { + if (c > 10) { + c = some_f(c) + } + } + } + */ + +} diff --git a/third_party/move/move-compiler-v2/tests/testsuite.rs b/third_party/move/move-compiler-v2/tests/testsuite.rs index 3318472c84329..3c1f7dd68b69c 100644 --- a/third_party/move/move-compiler-v2/tests/testsuite.rs +++ b/third_party/move/move-compiler-v2/tests/testsuite.rs @@ -13,7 +13,7 @@ use move_compiler_v2::{ }; use move_model::{metadata::LanguageVersion, model::GlobalEnv}; use move_prover_test_utils::{baseline_test, extract_test_directives}; -use move_stackless_bytecode::function_target_pipeline::FunctionTargetPipeline; +use move_stackless_bytecode::{ast_generator, function_target_pipeline::FunctionTargetPipeline}; use once_cell::unsync::Lazy; use std::{ cell::{RefCell, RefMut}, @@ -690,6 +690,7 @@ fn run_test(path: &Path, config: TestConfig) -> datatest_stable::Result<()> { logging::setup_logging_for_testing(); let path_str = path.display().to_string(); let mut options = config.options.clone(); + let run_ast_generator = path_str.contains("/ast-generator/"); options.warn_unused = path_str.contains("/unused/"); options.warn_deprecated = path_str.contains("/deprecated/"); options.compile_verify_code = path_str.contains("/verification/verify/"); @@ -812,6 +813,21 @@ fn run_test(path: &Path, config: TestConfig) -> datatest_stable::Result<()> { } }, ); + if run_ast_generator { + let out = &mut test_output.borrow_mut(); + for (fun_id, variant) in targets.get_funs_and_variants() { + let fun_env = env.get_function(fun_id); + let target = targets.get_target(&fun_env, &variant); + if let Some(exp) = ast_generator::generate_ast(&target) { + out.push_str(&format!( + "generated AST for {}:\n{}\n", + target.func_env.get_name_str(), + exp.display(&env) + )); + } + } + } + if *ok.borrow() && config.stop_after == StopAfter::FileFormat { let units = run_file_format_gen(&mut env, &targets); let out = &mut test_output.borrow_mut(); diff --git a/third_party/move/move-model/bytecode/src/ast_generator.rs b/third_party/move/move-model/bytecode/src/ast_generator.rs new file mode 100644 index 0000000000000..c4634ca67f8e5 --- /dev/null +++ b/third_party/move/move-model/bytecode/src/ast_generator.rs @@ -0,0 +1,669 @@ +// Copyright © Aptos Foundation +// Parts of the project are originally copyright © Meta Platforms, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! Converts stackless bytecode into Model AST. +//! +//! See [this article](https://medium.com/leaningtech/solving-the-structured-control-flow-problem-once-and-for-all-5123117b1ee2) +//! for an inspiration how this code works. + +use crate::{ + fat_loop::{build_loop_info, FatLoopFunctionInfo}, + function_target::FunctionTarget, + stackless_bytecode::{AttrId, Bytecode, Label, Operation as BytecodeOperation}, + stackless_control_flow_graph::{BlockContent, BlockId, StacklessControlFlowGraph}, +}; +use itertools::Itertools; +use move_binary_format::file_format::CodeOffset; +use move_model::{ + ast::{Exp, ExpData, Operation, Pattern, TempIndex}, + model::{GlobalEnv, NodeId}, + symbol::Symbol, + ty::Type, +}; +use std::{ + cmp::Ordering, + collections::{BTreeMap, BTreeSet}, +}; + +struct Context<'a> { + target: &'a FunctionTarget<'a>, + loop_info: BTreeMap, + back_edges: BTreeSet, + forward_cfg: StacklessControlFlowGraph, + backward_cfg: StacklessControlFlowGraph, + label_offsets: BTreeMap, + if_true_blocks: BTreeSet, +} + +struct Generator<'a> { + temps: BTreeMap, + block_stack: Vec>, +} + +#[derive(Debug)] +struct BlockInfo<'a> { + stms: Vec, + kind: BlockKind<'a>, +} + +#[derive(Debug)] +enum BlockKind<'a> { + Root, + Loop { + info: &'a LoopInfo, + }, + IfStart { + cond: Exp, + if_true_block: BlockId, + if_false_block: BlockId, + }, + IfThen { + cond: Exp, + if_false_block: BlockId, + opt_end_block: Option, + }, + IfElse { + cond: Exp, + then: Exp, + end_block: BlockId, + }, + Jump { + target: BlockId, + }, +} + +#[derive(Debug)] +struct LoopInfo { + header_label: Label, + back_edges: BTreeSet, + back_edge_blocks: BTreeSet, + all_blocks: BTreeSet, +} + +pub fn generate_ast(target: &FunctionTarget) -> Option { + let fat_loop_info = match build_loop_info(target) { + Ok(loop_info) => loop_info, + Err(err) => { + target.global_env().error( + &target.get_loc(), + &format!("cannot decompile function: {}", err), + ); + return None; + }, + }; + let back_edges = fat_loop_info.back_edges_locations(); + let code = target.get_bytecode(); + let forward_cfg = StacklessControlFlowGraph::new_forward(code); + let backward_cfg = StacklessControlFlowGraph::new_backward(code, false); + let label_offsets = Bytecode::label_offsets(code); + let loop_info = compute_loop_info(&backward_cfg, &label_offsets, &fat_loop_info); + let if_true_blocks = compute_if_true_blocks(code, &label_offsets); + let ctx = Context { + target, + loop_info, + back_edges, + forward_cfg, + backward_cfg, + label_offsets, + if_true_blocks, + }; + let mut gen = Generator { + temps: BTreeMap::new(), + block_stack: vec![], + }; + Some(gen.gen(&ctx)) +} + +fn compute_loop_info( + backward_cfg: &StacklessControlFlowGraph, + label_offsets: &BTreeMap, + loop_info: &FatLoopFunctionInfo, +) -> BTreeMap { + let mut loop_infos = BTreeMap::new(); + for (label, fat_loop) in &loop_info.fat_loops { + let header_block_id = label_offsets + .get(label) + .expect("loop header label has code offset"); + let back_edges = fat_loop.back_edges.clone(); + let back_edge_blocks = back_edges + .iter() + .map(|code_offset| backward_cfg.enclosing_block(*code_offset)) + .collect(); + let mut all_blocks = BTreeSet::new(); + all_blocks.insert(*header_block_id); + let mut todo = back_edges.iter().cloned().collect_vec(); + while let Some(blk_id) = todo.pop() { + if !all_blocks.insert(blk_id) { + // Already processed, or header + continue; + } + if let Some(succs) = backward_cfg.get_successors_map().get(&blk_id) { + todo.extend(succs.iter().cloned()) + } + } + loop_infos.insert(*label, LoopInfo { + header_label: *label, + back_edges, + back_edge_blocks, + all_blocks, + }); + } + loop_infos +} + +fn compute_if_true_blocks( + code: &[Bytecode], + label_offsets: &BTreeMap, +) -> BTreeSet { + let mut result = BTreeSet::new(); + for bc in code { + if let Bytecode::Branch(_, if_true, ..) = bc { + result.insert( + label_offsets + .get(if_true) + .expect("label offset for if_true block target") + .clone(), + ); + } + } + result +} + +impl<'a> Context<'a> { + fn code(&self) -> &[Bytecode] { + self.target.get_bytecode() + } + + fn block_of_label(&self, label: Label) -> BlockId { + *self + .label_offsets + .get(&label) + .expect("label has code offset") + } + + fn label_of_block(&self, ctx: &Context, block_id: BlockId) -> Option