From 8a6e7c52129c75e0d1a1b60c3e820de552fb0978 Mon Sep 17 00:00:00 2001 From: "Brian R. Murphy" <132495859+brmataptos@users.noreply.github.com> Date: Sat, 23 Nov 2024 10:52:41 -0800 Subject: [PATCH] Rename and rephrase Wolfgang's closure operations to match ongoing parser work: - `LdFunction`, `LdFunctionGeneric` = generate a closure from a defined function - `EarlyBind` = bind more arguments to any closure - `Invoke` = call a closure with final arguments Add some description/semantics to `file_format.rs` to better describe the implementation which will be needed. Get rid of complex Mask calculations in favor of simpler early bninding of `k` initial arguments. Hopefully keep all bytecode verifier operations working the same. --- api/types/src/bytecode.rs | 2 +- aptos-move/script-composer/src/decompiler.rs | 3 +- .../move-binary-format/src/check_bounds.rs | 11 +- .../src/check_complexity.rs | 11 +- .../move/move-binary-format/src/constant.rs | 2 +- .../move-binary-format/src/deserializer.rs | 15 + .../move-binary-format/src/file_format.rs | 285 ++++++++---------- .../src/file_format_common.rs | 12 +- .../move/move-binary-format/src/serializer.rs | 20 +- .../invalid-mutations/src/bounds/code_unit.rs | 27 +- .../src/acquires_list_verifier.rs | 36 ++- .../src/instruction_consistency.rs | 73 +++-- .../src/locals_safety/mod.rs | 7 +- .../src/reference_safety/abstract_state.rs | 17 +- .../src/reference_safety/mod.rs | 64 ++-- .../move-bytecode-verifier/src/signature.rs | 2 +- .../src/signature_v2.rs | 69 ++++- .../src/stack_usage_verifier.rs | 59 ++-- .../move-bytecode-verifier/src/type_safety.rs | 133 +++++--- .../move/move-core/types/src/vm_status.rs | 25 +- .../bytecode/src/function_target_pipeline.rs | 4 +- .../src/stackless_bytecode_generator.rs | 7 +- .../move/move-vm/runtime/src/interpreter.rs | 21 +- .../move-vm/test-utils/src/gas_schedule.rs | 25 +- .../move-disassembler/src/disassembler.rs | 98 +++++- 25 files changed, 689 insertions(+), 339 deletions(-) diff --git a/api/types/src/bytecode.rs b/api/types/src/bytecode.rs index f6ed8405471a4..ffdea8f6186c6 100644 --- a/api/types/src/bytecode.rs +++ b/api/types/src/bytecode.rs @@ -106,7 +106,7 @@ pub trait Bytecode { to: Box::new(self.new_move_type(t.borrow())), }, SignatureToken::Function(..) => { - // TODO + // TODO(LAMBDA) unimplemented!("signature token function to API MoveType") }, } diff --git a/aptos-move/script-composer/src/decompiler.rs b/aptos-move/script-composer/src/decompiler.rs index 56806f589ab08..147d976b90ccf 100644 --- a/aptos-move/script-composer/src/decompiler.rs +++ b/aptos-move/script-composer/src/decompiler.rs @@ -209,7 +209,8 @@ impl LocalState { TypeTag::Vector(Box::new(Self::type_tag_from_sig_token(script, s)?)) }, SignatureToken::Function(..) => { - bail!("function types NYI for script composer") + // TODO(LAMBDA) + bail!("function types not yet implemented for script composer") }, SignatureToken::Struct(s) => { let module_handle = script.module_handle_at(script.struct_handle_at(*s).module); diff --git a/third_party/move/move-binary-format/src/check_bounds.rs b/third_party/move/move-binary-format/src/check_bounds.rs index dca378e0957c3..2e9f40388d969 100644 --- a/third_party/move/move-binary-format/src/check_bounds.rs +++ b/third_party/move/move-binary-format/src/check_bounds.rs @@ -546,12 +546,12 @@ impl<'a> BoundsChecker<'a> { )?; } }, - Call(idx) | ClosPack(idx, _) => self.check_code_unit_bounds_impl( + Call(idx) | LdFunction(idx) => self.check_code_unit_bounds_impl( self.view.function_handles(), *idx, bytecode_offset, )?, - CallGeneric(idx) | ClosPackGeneric(idx, _) => { + CallGeneric(idx) | LdFunctionGeneric(idx) => { self.check_code_unit_bounds_impl( self.view.function_instantiations(), *idx, @@ -650,15 +650,16 @@ impl<'a> BoundsChecker<'a> { }, // Instructions that refer to a signature - ClosEval(idx) - | VecPack(idx, _) + VecPack(idx, _) | VecLen(idx) | VecImmBorrow(idx) | VecMutBorrow(idx) | VecPushBack(idx) | VecPopBack(idx) | VecUnpack(idx, _) - | VecSwap(idx) => { + | VecSwap(idx) + | Invoke(idx) + | EarlyBind(idx, _) => { self.check_code_unit_bounds_impl( self.view.signatures(), *idx, diff --git a/third_party/move/move-binary-format/src/check_complexity.rs b/third_party/move/move-binary-format/src/check_complexity.rs index dbc8890d4cf3e..4aff6b1bfd7e9 100644 --- a/third_party/move/move-binary-format/src/check_complexity.rs +++ b/third_party/move/move-binary-format/src/check_complexity.rs @@ -262,7 +262,7 @@ impl<'a> BinaryComplexityMeter<'a> { for instr in &code.code { match instr { - CallGeneric(idx) | ClosPackGeneric(idx, ..) => { + CallGeneric(idx) | LdFunctionGeneric(idx, ..) => { self.meter_function_instantiation(*idx)?; }, PackGeneric(idx) | UnpackGeneric(idx) => { @@ -284,15 +284,16 @@ impl<'a> BinaryComplexityMeter<'a> { ImmBorrowVariantFieldGeneric(idx) | MutBorrowVariantFieldGeneric(idx) => { self.meter_variant_field_instantiation(*idx)?; }, - ClosEval(idx) - | VecPack(idx, _) + VecPack(idx, _) | VecLen(idx) | VecImmBorrow(idx) | VecMutBorrow(idx) | VecPushBack(idx) | VecPopBack(idx) | VecUnpack(idx, _) - | VecSwap(idx) => { + | VecSwap(idx) + | Invoke(idx) + | EarlyBind(idx, _) => { self.meter_signature(*idx)?; }, @@ -324,7 +325,7 @@ impl<'a> BinaryComplexityMeter<'a> { | PackVariant(_) | UnpackVariant(_) | TestVariant(_) - | ClosPack(..) + | LdFunction(_) | ReadRef | WriteRef | FreezeRef diff --git a/third_party/move/move-binary-format/src/constant.rs b/third_party/move/move-binary-format/src/constant.rs index 6fa8b21fa11fb..3b8d200787a4e 100644 --- a/third_party/move/move-binary-format/src/constant.rs +++ b/third_party/move/move-binary-format/src/constant.rs @@ -18,7 +18,7 @@ fn sig_to_ty(sig: &SignatureToken) -> Option { SignatureToken::U256 => Some(MoveTypeLayout::U256), SignatureToken::Vector(v) => Some(MoveTypeLayout::Vector(Box::new(sig_to_ty(v.as_ref())?))), SignatureToken::Function(..) => { - // TODO: do we need representation in MoveTypeLayout? + // TODO(LAMBDA): do we need representation in MoveTypeLayout? None }, SignatureToken::Reference(_) diff --git a/third_party/move/move-binary-format/src/deserializer.rs b/third_party/move/move-binary-format/src/deserializer.rs index 89a78a2562c6e..24f096ef482c9 100644 --- a/third_party/move/move-binary-format/src/deserializer.rs +++ b/third_party/move/move-binary-format/src/deserializer.rs @@ -114,6 +114,12 @@ impl Table { } } +fn read_u8_internal(cursor: &mut VersionedCursor) -> BinaryLoaderResult { + cursor.read_u8().map_err(|_| { + PartialVMError::new(StatusCode::MALFORMED).with_message("Unexpected EOF".to_string()) + }) +} + fn read_u16_internal(cursor: &mut VersionedCursor) -> BinaryLoaderResult { let mut u16_bytes = [0; 2]; cursor @@ -1814,6 +1820,15 @@ fn load_code(cursor: &mut VersionedCursor, code: &mut Vec) -> BinaryLo Opcodes::CAST_U16 => Bytecode::CastU16, Opcodes::CAST_U32 => Bytecode::CastU32, Opcodes::CAST_U256 => Bytecode::CastU256, + + Opcodes::LD_FUNCTION => Bytecode::LdFunction(load_function_handle_index(cursor)?), + Opcodes::LD_FUNCTION_GENERIC => { + Bytecode::LdFunctionGeneric(load_function_inst_index(cursor)?) + }, + Opcodes::INVOKE => Bytecode::Invoke(load_signature_index(cursor)?), + Opcodes::EARLY_BIND => { + Bytecode::EarlyBind(load_signature_index(cursor)?, read_u8_internal(cursor)?) + }, }; code.push(bytecode); } diff --git a/third_party/move/move-binary-format/src/file_format.rs b/third_party/move/move-binary-format/src/file_format.rs index a2058b333dc28..4d431f5c319e2 100644 --- a/third_party/move/move-binary-format/src/file_format.rs +++ b/third_party/move/move-binary-format/src/file_format.rs @@ -49,7 +49,6 @@ use proptest::{collection::vec, prelude::*, strategy::BoxedStrategy}; use ref_cast::RefCast; use serde::{Deserialize, Serialize}; use std::{ - collections::BTreeMap, fmt::{self, Formatter}, ops::BitOr, }; @@ -1011,6 +1010,13 @@ impl AbilitySet { pub fn into_u8(self) -> u8 { self.0 } + + pub fn to_string_concise(self) -> String { + self.iter() + .map(|a| a.to_string()) + .collect::>() + .join("+") + } } impl fmt::Display for AbilitySet { @@ -1019,8 +1025,8 @@ impl fmt::Display for AbilitySet { &self .iter() .map(|a| a.to_string()) - .reduce(|l, r| format!("{} + {}", l, r)) - .unwrap_or_default(), + .collect::>() + .join(" + "), ) } } @@ -1476,6 +1482,13 @@ impl SignatureToken { } } + /// Returns true if the `SignatureToken` is a function. + pub fn is_function(&self) -> bool { + use SignatureToken::*; + + matches!(self, Function(..)) + } + /// Set the index to this one. Useful for random testing. /// /// Panics if this token doesn't contain a struct handle. @@ -1538,101 +1551,6 @@ impl SignatureToken { } } -/// A `ClosureMask` is a value which determines how to distinguish those function arguments -/// which are captured and which are not when a closure is constructed. For instance, -/// with `_` representing an omitted argument, the mask for `f(a,_,b,_)` would have the argument -/// at index 0 and at index 2 captured. The mask can be used to transform lists of types. -#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] -#[cfg_attr(any(test, feature = "fuzzing"), derive(proptest_derive::Arbitrary))] -#[cfg_attr(any(test, feature = "fuzzing"), proptest(no_params))] -#[cfg_attr(feature = "fuzzing", derive(arbitrary::Arbitrary))] -pub struct ClosureMask { - pub mask: u64, -} - -impl fmt::Display for ClosureMask { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{:b}", self.mask) - } -} - -impl ClosureMask { - pub fn new(mask: u64) -> Self { - Self { mask } - } - - /// Apply a closure mask to a list of elements, returning only those - /// where position `i` is set in the mask (if `collect_captured` is true) or not - /// set (otherwise). - pub fn extract(&self, tys: &[T], collect_captured: bool) -> Vec { - tys.iter() - .enumerate() - .filter_map(|(pos, x)| { - let set = (1 << pos) & self.mask != 0; - if set && collect_captured || !set && !collect_captured { - Some(x.clone()) - } else { - None - } - }) - .collect() - } - - /// Compose two lists of elements into one based on the given mask such that the - /// following holds: - /// ```ignore - /// mask.compose(mask.extract(v, true), mask.extract(v, false)) == v - /// ``` - /// This returns `None` if the provided lists are inconsistent w.r.t the mask - /// and cannot be composed. This should not happen in verified code, but - /// a caller should decide whether to crash or to error. - pub fn compose(&self, captured: &[T], provided: &[T]) -> Option> { - let mut result = BTreeMap::new(); // expect ordered enumeration - let mut cap_idx = 0; - let mut pro_idx = 0; - for i in 0..64 { - if cap_idx >= captured.len() && pro_idx >= provided.len() { - // all covered - break; - } - if (1 << i) & self.mask != 0 { - if cap_idx >= captured.len() { - // Inconsistency - return None; - } - result.insert(i, captured[cap_idx].clone()); - cap_idx += 1 - } else { - if pro_idx >= provided.len() { - // Inconsistency - return None; - } - result.insert(i, provided[pro_idx].clone()); - pro_idx += 1 - } - } - let map_len = result.len(); - let vec = result.into_values().collect::>(); - if vec.len() != map_len { - // Inconsistency: all indices must be contiguously covered - None - } else { - Some(vec) - } - } - - /// Return the max index of captured arguments - pub fn max_captured(&self) -> usize { - let mut i = 0; - let mut mask = self.mask; - while mask != 0 { - mask >>= 1; - i += 1 - } - i - } -} - /// A `Constant` is a serialized value along with its type. That type will be deserialized by the /// loader/evaluator #[derive(Clone, Debug, Eq, PartialEq, Hash)] @@ -1786,7 +1704,7 @@ pub enum Bytecode { arithmetic error else: stack << int_val as u8 - "#] + "#] #[runtime_check_epilogue = r#" ty_stack >> _ ty_stack << u8 @@ -1974,6 +1892,7 @@ pub enum Bytecode { ty_stack << struct_ty "#] Pack(StructDefinitionIndex), + #[group = "struct"] #[static_operands = "[struct_inst_idx]"] #[description = "Generic version of `Pack`."] @@ -3051,81 +2970,132 @@ pub enum Bytecode { VecSwap(SignatureIndex), #[group = "closure"] - #[description = r#" - `ClosPack(fun, mask)` creates a closure for a given function handle as controlled by - the given `mask`. `mask` is a u64 bitset which describes which of the arguments - of `fun` are captured by the closure. - - If the function `fun` has type `|t1..tn|r`, then the following holds: - - - If `m` are the number of bits set in the mask, then `m <= n`, and the stack is - `[vm..v1] + stack`, and if `i` is the `j`th bit set in the mask, - then `vj` has type `ti`. - - type ti is not a reference. - - Thus the values on the stack must match the types in the function - signature which have the bit to be captured set in the mask. - - The type of the resulting value on the stack is derived from the types `|t1..tn|` - for which the bit is not set, which build the arguments of a function type - with `fun`'s result types. - - The `abilities` of this function type are derived from the inputs as follows. - First, take the intersection of the abilities of all captured arguments - with type `t1..tn`. Then intersect this with the abilities derived from the - function: a function handle has `drop` and `copy`, never has `key`, and only - `store` if the underlying function is public, and therefore cannot change - its signature. - - Notice that an implementation can derive the types of the captured arguments - at runtime from a closure value as long as the closure value stores the function - handle (or a derived form of it) and the mask, and the handle allows to lookup the - function's type at runtime. Then the same procedure as outlined above can be used. - "#] - #[static_operands = "[fun, mask]"] - #[semantics = ""] - #[runtime_check_epilogue = ""] - #[gas_type_creation_tier_0 = "closure_ty"] - ClosPack(FunctionHandleIndex, ClosureMask), + #[description = "Load a function value onto the stack."] + #[static_operands = "[func_handle_idx]"] + #[semantics = "stack << functions[function_handle_idx]"] + #[runtime_check_epilogue = "ty_stack << func_ty"] + #[gas_type_creation_tier_1 = "func_ty"] + LdFunction(FunctionHandleIndex), + + #[group = "closure"] + #[description = "Generic version of `LdFunction`."] + #[static_operands = "[func_inst_idx]"] + #[semantics = "See `LdFunction`."] + #[runtime_check_epilogue = "See `LdFunction`."] + #[gas_type_creation_tier_0 = "ty_args"] + #[gas_type_creation_tier_1 = "local_tys"] + LdFunctionGeneric(FunctionInstantiationIndex), #[group = "closure"] - #[static_operands = "[fun, mask]"] - #[semantics = ""] - #[runtime_check_epilogue = ""] #[description = r#" - Same as `ClosPack` but for the instantiation of a generic function. + `EarlyBind(|t1..tn|r with a, count)` creates new function value based + on the function value at top of stack by adding `count` arguments + popped from the stack to the closure found on top of stack. + + If the function value's type has at least `count` parameters with types + that match the `count` arguments on the stack, then a function closure + capturing those values with the provided function is pushed on top of + stack. - Notice that an uninstantiated generic function cannot be used to create a closure. + Notice that the type as part of this instruction is redundant for + execution semantics. Since the closure is expected to be on top of the stack, + it can decode the arguments underneath without type information. + However, the type is required for the current implementation of + static bytecode verification. + "#] + #[static_operands = "[u8_value]"] + #[semantics = r#" + stack >> function_handle + + let [func, k, [arg_0, .., arg_{k-1}]] = function_handle + // Information like the function signature are loaded from the file format + i = u8_value + n = func.num_params + if i + k > n then abort + + stack >> arg'_{i-1} + .. + stack >> arg'_0 + new_function_handle = [func, k + i, [arg_0, .., arg_{k-1}, arg'_0, .., arg'_{i-1}]] + stack << new_function_handle + "#] + #[runtime_check_epilogue = r#" + // NOT: assert func visibility rules + func param types match provided parameter types "#] #[gas_type_creation_tier_0 = "closure_ty"] - ClosPackGeneric(FunctionInstantiationIndex, ClosureMask), + EarlyBind(SignatureIndex, u8), #[group = "closure"] #[description = r#" - `ClosEval(|t1..tn|r has a)` evalutes a closure of the given function type, taking - the captured arguments and mixing in the provided ones on the stack. + `Invoke(|t1..tn|r with a)` calls a function value of the specified type, + with `n` argument values from the stack. On top of the stack is the closure being evaluated, underneath the arguments: `[c,vn,..,v1] + stack`. The type of the closure must match the type specified in the instruction, with abilities `a` a subset of the abilities of the closure value. A value `vi` on the stack must have type `ti`. - Notice that the type as part of the closure instruction is redundant for + Notice that the type as part of this instruction is redundant for execution semantics. Since the closure is expected to be on top of the stack, it can decode the arguments underneath without type information. - However, the type is required to do static bytecode verification. + However, the type is required for the current implementation of + static bytecode verification. - The semantics of this instruction can be characterized by the following equation: + The arguments are consumed and pushed to the locals of the function. + + Return values are pushed onto the stack from the first to the last and + available to the caller after returning from the callee. - ``` - CloseEval(ClosPack(f, mask, c1..cn), a1..am) = f(mask.compose(c1..cn, a1..am)) - ``` + During execution of the function conservative_invocation_mode is enabled; + afterwards, it is restored to its original state. "#] - #[static_operands = "[]"] - #[semantics = ""] - #[runtime_check_epilogue = ""] - #[gas_type_creation_tier_0 = "closure_ty"] - ClosEval(SignatureIndex), + #[semantics = r#" + stack >> function_handle + let [func, k, [arg_0, .., arg_{k-1}]] = function_handle + // Information like the function signature are loaded from the file format + // and compared with the signature parameter. + n = func.num_params + ty_args = if func.is_generic then func.ty_args else [] + + n = func.num_params + stack >> arg_{n-1} + .. + stack >> arg_k + + old_conservative_invocation_mode = conservative_invocation_mode + conservative_invocation_mode = true + + let module = func.module + if module is on invocation_stack then + abort + else + invocation_stack << module + + if func.is_native() + call_native(func.name, ty_args, args = [arg_0, .., arg_{n-1}]) + current_frame.pc += 1 + else + call_stack << current_frame + + current_frame = new_frame_from_func( + func, + ty_args, + locals = [arg_0, .., arg_n-1, invalid, ..] + // ^ other locals + ) + + invocation_stack >> _ + conservative_invocation_mode = old_conservative_invocation_mode + "#] + #[runtime_check_epilogue = r#" + // NOT: assert func visibility rules + for i in 0..#args: + ty_stack >> ty + assert ty == locals[#args - i - 1] + "#] + #[gas_type_creation_tier_1 = "closure_ty"] + Invoke(SignatureIndex), #[group = "stack_and_local"] #[description = "Push a u16 constant onto the stack."] @@ -3237,9 +3207,10 @@ impl ::std::fmt::Debug for Bytecode { Bytecode::UnpackGeneric(a) => write!(f, "UnpackGeneric({})", a), Bytecode::UnpackVariant(a) => write!(f, "UnpackVariant({})", a), Bytecode::UnpackVariantGeneric(a) => write!(f, "UnpackVariantGeneric({})", a), - Bytecode::ClosPackGeneric(a, mask) => write!(f, "ClosPackGeneric({}, {})", a, mask), - Bytecode::ClosPack(a, mask) => write!(f, "ClosPack({}, {})", a, mask), - Bytecode::ClosEval(a) => write!(f, "ClosEval({})", a), + Bytecode::LdFunction(a) => write!(f, "LdFunction({})", a), + Bytecode::LdFunctionGeneric(a) => write!(f, "LdFunctionGeneric({})", a), + Bytecode::EarlyBind(sig_idx, a) => write!(f, "EarlyBind({}, {})", sig_idx, a), + Bytecode::Invoke(sig_idx) => write!(f, "Invoke({})", sig_idx), Bytecode::ReadRef => write!(f, "ReadRef"), Bytecode::WriteRef => write!(f, "WriteRef"), Bytecode::FreezeRef => write!(f, "FreezeRef"), diff --git a/third_party/move/move-binary-format/src/file_format_common.rs b/third_party/move/move-binary-format/src/file_format_common.rs index 0201a408262a8..0627507bf1b71 100644 --- a/third_party/move/move-binary-format/src/file_format_common.rs +++ b/third_party/move/move-binary-format/src/file_format_common.rs @@ -299,6 +299,11 @@ pub enum Opcodes { UNPACK_VARIANT_GENERIC = 0x55, TEST_VARIANT = 0x56, TEST_VARIANT_GENERIC = 0x57, + // Closures + LD_FUNCTION = 0x58, + LD_FUNCTION_GENERIC = 0x59, + INVOKE = 0x5A, + EARLY_BIND = 0x5B, } /// Upper limit on the binary size @@ -790,9 +795,10 @@ pub fn instruction_key(instruction: &Bytecode) -> u8 { TestVariant(_) => Opcodes::TEST_VARIANT, TestVariantGeneric(_) => Opcodes::TEST_VARIANT_GENERIC, // Since bytecode version 8 - ClosPack(..) | ClosPackGeneric(..) | ClosEval(_) => { - unimplemented!("serialization of closure opcodes") - }, + LdFunction(_) => Opcodes::LD_FUNCTION, + LdFunctionGeneric(_) => Opcodes::LD_FUNCTION_GENERIC, + Invoke(_) => Opcodes::INVOKE, + EarlyBind(..) => Opcodes::EARLY_BIND, }; opcode as u8 } diff --git a/third_party/move/move-binary-format/src/serializer.rs b/third_party/move/move-binary-format/src/serializer.rs index d3bcbee4885d0..e6e66657c19a7 100644 --- a/third_party/move/move-binary-format/src/serializer.rs +++ b/third_party/move/move-binary-format/src/serializer.rs @@ -1095,9 +1095,25 @@ fn serialize_instruction_inner( binary.push(Opcodes::TEST_VARIANT_GENERIC as u8)?; serialize_struct_variant_inst_index(binary, class_idx) }, - Bytecode::ClosPack(..) | Bytecode::ClosPackGeneric(..) | Bytecode::ClosEval(_) => { - unimplemented!("serialization of closure opcodes") + + Bytecode::LdFunction(method_idx) => { + binary.push(Opcodes::LD_FUNCTION as u8)?; + serialize_function_handle_index(binary, method_idx) + }, + Bytecode::LdFunctionGeneric(method_idx) => { + binary.push(Opcodes::LD_FUNCTION_GENERIC as u8)?; + serialize_function_inst_index(binary, method_idx) + }, + Bytecode::Invoke(sig_idx) => { + binary.push(Opcodes::INVOKE as u8)?; + serialize_signature_index(binary, sig_idx) }, + Bytecode::EarlyBind(sig_idx, value) => { + binary.push(Opcodes::EARLY_BIND as u8)?; + serialize_signature_index(binary, sig_idx)?; + binary.push(*value) + }, + Bytecode::ReadRef => binary.push(Opcodes::READ_REF as u8), Bytecode::WriteRef => binary.push(Opcodes::WRITE_REF as u8), Bytecode::Add => binary.push(Opcodes::ADD as u8), diff --git a/third_party/move/move-bytecode-verifier/invalid-mutations/src/bounds/code_unit.rs b/third_party/move/move-bytecode-verifier/invalid-mutations/src/bounds/code_unit.rs index c98e8317ee33f..934aca14890b7 100644 --- a/third_party/move/move-bytecode-verifier/invalid-mutations/src/bounds/code_unit.rs +++ b/third_party/move/move-bytecode-verifier/invalid-mutations/src/bounds/code_unit.rs @@ -498,7 +498,23 @@ impl<'a> ApplyCodeUnitBoundsContext<'a> { // TODO(#13806): implement panic!("Enum types bytecode NYI: {:?}", code[bytecode_idx]) }, - ClosPack(..) | ClosPackGeneric(..) | ClosEval(..) => { + LdFunction(_) => struct_bytecode!( + function_handles_len, + current_fdef, + bytecode_idx, + offset, + FunctionHandleIndex, + LdFunction + ), + LdFunctionGeneric(_) => struct_bytecode!( + function_inst_len, + current_fdef, + bytecode_idx, + offset, + FunctionInstantiationIndex, + LdFunctionGeneric + ), + Invoke(..) | EarlyBind(..) => { panic!("Closure bytecode NYI: {:?}", code[bytecode_idx]) }, }; @@ -560,10 +576,11 @@ fn is_interesting(bytecode: &Bytecode) -> bool { | LdFalse | ReadRef | WriteRef | Add | Sub | Mul | Mod | Div | BitOr | BitAnd | Xor | Shl | Shr | Or | And | Not | Eq | Neq | Lt | Gt | Le | Ge | Abort | Nop => false, - ClosPack(..) - | ClosPackGeneric(..) - | ClosEval(..) - | PackVariant(_) + LdFunction(_) | LdFunctionGeneric(_) | Invoke(_) | EarlyBind(..) => { + // TODO(LAMBDA): implement + false + }, + PackVariant(_) | PackVariantGeneric(_) | UnpackVariant(_) | UnpackVariantGeneric(_) diff --git a/third_party/move/move-bytecode-verifier/src/acquires_list_verifier.rs b/third_party/move/move-bytecode-verifier/src/acquires_list_verifier.rs index 0de40723ae24e..41d5282ac05ea 100644 --- a/third_party/move/move-bytecode-verifier/src/acquires_list_verifier.rs +++ b/third_party/move/move-bytecode-verifier/src/acquires_list_verifier.rs @@ -102,10 +102,15 @@ impl<'a> AcquiresVerifier<'a> { self.struct_acquire(si.def, offset) }, - Bytecode::ClosPack(..) - | Bytecode::ClosPackGeneric(..) - | Bytecode::ClosEval(_) - | Bytecode::Pop + Bytecode::LdFunction(idx) => self.ld_function_acquire(*idx, offset), + Bytecode::LdFunctionGeneric(idx) => { + let fi = self.module.function_instantiation_at(*idx); + self.ld_function_acquire(fi.handle, offset) + }, + Bytecode::EarlyBind(_sig_idx, _count) => Ok(()), + Bytecode::Invoke(_sig_idx) => self.invoke_acquire(offset), + + Bytecode::Pop | Bytecode::BrTrue(_) | Bytecode::BrFalse(_) | Bytecode::Abort @@ -205,6 +210,29 @@ impl<'a> AcquiresVerifier<'a> { Ok(()) } + fn ld_function_acquire( + &mut self, + fh_idx: FunctionHandleIndex, + offset: CodeOffset, + ) -> PartialVMResult<()> { + // Currenty we are disallowing acquires for any function value which + // is created, so Invoke does nothing with acquires. + // TODO(LAMBDA) In the future this may change. + let function_handle = self.module.function_handle_at(fh_idx); + let function_acquired_resources = self.function_acquired_resources(function_handle, fh_idx); + if !function_acquired_resources.is_empty() { + return Err(self.error(StatusCode::LD_FUNCTION_NONEMPTY_ACQUIRES, offset)); + } + Ok(()) + } + + fn invoke_acquire(&mut self, _offset: CodeOffset) -> PartialVMResult<()> { + // Currenty we are disallowing acquires for any function value which + // is created, so Invoke does nothing with acquires. + // TODO(LAMBDA) In the future this may change. + Ok(()) + } + fn struct_acquire( &mut self, sd_idx: StructDefinitionIndex, diff --git a/third_party/move/move-bytecode-verifier/src/instruction_consistency.rs b/third_party/move/move-bytecode-verifier/src/instruction_consistency.rs index 9e170e7cefa63..fdba5d7033813 100644 --- a/third_party/move/move-bytecode-verifier/src/instruction_consistency.rs +++ b/third_party/move/move-bytecode-verifier/src/instruction_consistency.rs @@ -11,9 +11,9 @@ use move_binary_format::{ binary_views::BinaryIndexedView, errors::{Location, PartialVMError, PartialVMResult, VMResult}, file_format::{ - Bytecode, ClosureMask, CodeOffset, CodeUnit, CompiledModule, CompiledScript, - FieldHandleIndex, FunctionDefinitionIndex, FunctionHandleIndex, StructDefinitionIndex, - StructVariantHandleIndex, TableIndex, VariantFieldHandleIndex, + Bytecode, CodeOffset, CodeUnit, CompiledModule, CompiledScript, FieldHandleIndex, + FunctionDefinitionIndex, FunctionHandleIndex, SignatureIndex, SignatureToken, + StructDefinitionIndex, StructVariantHandleIndex, TableIndex, VariantFieldHandleIndex, }, }; use move_core_types::vm_status::StatusCode; @@ -91,21 +91,25 @@ impl<'a> InstructionConsistency<'a> { )?; }, Call(idx) => { - // Nothing to verify for `_mask`, so merge with Call self.check_function_op(offset, *idx, /* generic */ false)?; }, CallGeneric(idx) => { let func_inst = self.resolver.function_instantiation_at(*idx); self.check_function_op(offset, func_inst.handle, /* generic */ true)?; }, - ClosPack(idx, mask) => { - self.check_function_op(offset, *idx, /* generic */ false)?; - self.check_closure_mask(offset, *idx, *mask)? + LdFunction(idx) => { + self.check_ld_function_op(offset, *idx, /* generic */ false)?; }, - ClosPackGeneric(idx, mask) => { + LdFunctionGeneric(idx) => { let func_inst = self.resolver.function_instantiation_at(*idx); - self.check_function_op(offset, func_inst.handle, /* generic */ true)?; - self.check_closure_mask(offset, func_inst.handle, *mask)? + self.check_ld_function_op(offset, func_inst.handle, /* generic */ true)?; + }, + Invoke(sig_idx) => { + // reuse code to check for signature issues. + self.check_bind_count(offset, *sig_idx, 0)?; + }, + EarlyBind(sig_idx, count) => { + self.check_bind_count(offset, *sig_idx, *count)?; }, Pack(idx) | Unpack(idx) => { self.check_struct_op(offset, *idx, /* generic */ false)?; @@ -145,11 +149,11 @@ impl<'a> InstructionConsistency<'a> { // List out the other options explicitly so there's a compile error if a new // bytecode gets added. - ClosEval(_) | FreezeRef | Pop | Ret | Branch(_) | BrTrue(_) | BrFalse(_) - | LdU8(_) | LdU16(_) | LdU32(_) | LdU64(_) | LdU128(_) | LdU256(_) | LdConst(_) - | CastU8 | CastU16 | CastU32 | CastU64 | CastU128 | CastU256 | LdTrue | LdFalse - | ReadRef | WriteRef | Add | Sub | Mul | Mod | Div | BitOr | BitAnd | Xor | Shl - | Shr | Or | And | Not | Eq | Neq | Lt | Gt | Le | Ge | CopyLoc(_) | MoveLoc(_) + FreezeRef | Pop | Ret | Branch(_) | BrTrue(_) | BrFalse(_) | LdU8(_) | LdU16(_) + | LdU32(_) | LdU64(_) | LdU128(_) | LdU256(_) | LdConst(_) | CastU8 | CastU16 + | CastU32 | CastU64 | CastU128 | CastU256 | LdTrue | LdFalse | ReadRef + | WriteRef | Add | Sub | Mul | Mod | Div | BitOr | BitAnd | Xor | Shl | Shr + | Or | And | Not | Eq | Neq | Lt | Gt | Le | Ge | CopyLoc(_) | MoveLoc(_) | StLoc(_) | MutBorrowLoc(_) | ImmBorrowLoc(_) | VecLen(_) | VecImmBorrow(_) | VecMutBorrow(_) | VecPushBack(_) | VecPopBack(_) | VecSwap(_) | Abort | Nop => (), } @@ -222,7 +226,7 @@ impl<'a> InstructionConsistency<'a> { Ok(()) } - fn check_function_op( + fn check_ld_function_op( &self, offset: usize, func_handle_index: FunctionHandleIndex, @@ -238,16 +242,43 @@ impl<'a> InstructionConsistency<'a> { Ok(()) } - fn check_closure_mask( + fn check_function_op( &self, offset: usize, func_handle_index: FunctionHandleIndex, - mask: ClosureMask, + generic: bool, ) -> PartialVMResult<()> { let function_handle = self.resolver.function_handle_at(func_handle_index); - let signature = self.resolver.signature_at(function_handle.parameters); - if mask.max_captured() >= signature.len() { - return Err(PartialVMError::new(StatusCode::INVALID_CLOSURE_MASK) + if function_handle.type_parameters.is_empty() == generic { + return Err( + PartialVMError::new(StatusCode::GENERIC_MEMBER_OPCODE_MISMATCH) + .at_code_offset(self.current_function(), offset as CodeOffset), + ); + } + Ok(()) + } + + fn check_bind_count( + &self, + offset: usize, + sig_index: SignatureIndex, + count: u8, + ) -> PartialVMResult<()> { + let signature = self.resolver.signature_at(sig_index); + if let Some(sig_token) = signature.0.first() { + if let SignatureToken::Function(params, _returns, _abilities) = sig_token { + if count as usize > params.len() { + return Err( + PartialVMError::new(StatusCode::NUMBER_OF_TYPE_ARGUMENTS_MISMATCH) + .at_code_offset(self.current_function(), offset as CodeOffset), + ); + } + } else { + return Err(PartialVMError::new(StatusCode::REQUIRES_FUNCTION) + .at_code_offset(self.current_function(), offset as CodeOffset)); + } + } else { + return Err(PartialVMError::new(StatusCode::UNKNOWN_SIGNATURE_TYPE) .at_code_offset(self.current_function(), offset as CodeOffset)); } Ok(()) diff --git a/third_party/move/move-bytecode-verifier/src/locals_safety/mod.rs b/third_party/move/move-bytecode-verifier/src/locals_safety/mod.rs index f49c37ad812f1..006f102eedbb4 100644 --- a/third_party/move/move-bytecode-verifier/src/locals_safety/mod.rs +++ b/third_party/move/move-bytecode-verifier/src/locals_safety/mod.rs @@ -125,9 +125,10 @@ fn execute_inner( | Bytecode::UnpackVariantGeneric(_) | Bytecode::TestVariant(_) | Bytecode::TestVariantGeneric(_) - | Bytecode::ClosPack(..) - | Bytecode::ClosPackGeneric(..) - | Bytecode::ClosEval(_) + | Bytecode::LdFunction(_) + | Bytecode::LdFunctionGeneric(_) + | Bytecode::Invoke(_) + | Bytecode::EarlyBind(..) | Bytecode::ReadRef | Bytecode::WriteRef | Bytecode::CastU8 diff --git a/third_party/move/move-bytecode-verifier/src/reference_safety/abstract_state.rs b/third_party/move/move-bytecode-verifier/src/reference_safety/abstract_state.rs index 1f8fd1a6d04e9..e8bfe35827a0f 100644 --- a/third_party/move/move-bytecode-verifier/src/reference_safety/abstract_state.rs +++ b/third_party/move/move-bytecode-verifier/src/reference_safety/abstract_state.rs @@ -568,7 +568,22 @@ impl AbstractState { Ok(return_values) } - pub fn clos_eval( + pub fn ld_function( + &mut self, + offset: CodeOffset, + acquired_resources: &BTreeSet, + _meter: &mut impl Meter, + ) -> PartialVMResult { + if !acquired_resources.is_empty() { + // TODO(LAMBDA): Currently acquires must be empty unless we disallow + // Invoke to call to functions defined in the same module. + return Err(self.error(StatusCode::INVALID_ACQUIRES_ANNOTATION, offset)); + } + // TODO(LAMBDA): Double-check that we don't need meter adjustments here. + Ok(AbstractValue::NonReference) + } + + pub fn invoke( &mut self, offset: CodeOffset, arguments: Vec, diff --git a/third_party/move/move-bytecode-verifier/src/reference_safety/mod.rs b/third_party/move/move-bytecode-verifier/src/reference_safety/mod.rs index b5708b461e5b8..d8426892e30cf 100644 --- a/third_party/move/move-bytecode-verifier/src/reference_safety/mod.rs +++ b/third_party/move/move-bytecode-verifier/src/reference_safety/mod.rs @@ -22,9 +22,8 @@ use move_binary_format::{ binary_views::{BinaryIndexedView, FunctionView}, errors::{PartialVMError, PartialVMResult}, file_format::{ - Bytecode, ClosureMask, CodeOffset, FunctionDefinitionIndex, FunctionHandle, - IdentifierIndex, SignatureIndex, SignatureToken, StructDefinition, StructVariantHandle, - VariantIndex, + Bytecode, CodeOffset, FunctionDefinitionIndex, FunctionHandle, IdentifierIndex, + SignatureIndex, SignatureToken, StructDefinition, StructVariantHandle, VariantIndex, }, safe_assert, safe_unwrap, views::FieldOrVariantIndex, @@ -101,24 +100,47 @@ fn call( Ok(()) } -fn clos_pack( +fn ld_function( verifier: &mut ReferenceSafetyAnalysis, + state: &mut AbstractState, + offset: CodeOffset, function_handle: &FunctionHandle, - mask: ClosureMask, + meter: &mut impl Meter, ) -> PartialVMResult<()> { - let parameters = verifier.resolver.signature_at(function_handle.parameters); - // Extract the captured arguments and pop them from the stack - let argc = mask.extract(¶meters.0, true).len(); - for _ in 0..argc { + let _parameters = verifier.resolver.signature_at(function_handle.parameters); + let acquired_resources = match verifier.name_def_map.get(&function_handle.name) { + Some(idx) => { + let func_def = verifier.resolver.function_def_at(*idx)?; + let fh = verifier.resolver.function_handle_at(func_def.function); + if function_handle == fh { + func_def.acquires_global_resources.iter().cloned().collect() + } else { + BTreeSet::new() + } + }, + None => BTreeSet::new(), + }; + let value = state.ld_function(offset, &acquired_resources, meter)?; + verifier.stack.push(value); + Ok(()) +} + +fn early_bind( + verifier: &mut ReferenceSafetyAnalysis, + _arg_tys: Vec, + k: u8, +) -> PartialVMResult<()> { + safe_assert!(safe_unwrap!(verifier.stack.pop()).is_value()); + for _ in 0..k { // Currently closures require captured arguments to be values. This is verified // by type safety. - safe_assert!(safe_unwrap!(verifier.stack.pop()).is_value()) + safe_assert!(safe_unwrap!(verifier.stack.pop()).is_value()); } verifier.stack.push(AbstractValue::NonReference); Ok(()) } -fn clos_eval( +fn invoke( verifier: &mut ReferenceSafetyAnalysis, state: &mut AbstractState, offset: CodeOffset, @@ -131,7 +153,7 @@ fn clos_eval( .map(|_| verifier.stack.pop().unwrap()) .rev() .collect(); - let values = state.clos_eval(offset, arguments, &result_tys, meter)?; + let values = state.invoke(offset, arguments, &result_tys, meter)?; for value in values { verifier.stack.push(value) } @@ -547,18 +569,22 @@ fn execute_inner( unpack_variant(verifier, handle)? }, - Bytecode::ClosPack(idx, mask) => { + Bytecode::LdFunction(idx) => { let function_handle = verifier.resolver.function_handle_at(*idx); - clos_pack(verifier, function_handle, *mask)? + ld_function(verifier, state, offset, function_handle, meter)? }, - Bytecode::ClosPackGeneric(idx, mask) => { + Bytecode::LdFunctionGeneric(idx) => { let func_inst = verifier.resolver.function_instantiation_at(*idx); let function_handle = verifier.resolver.function_handle_at(func_inst.handle); - clos_pack(verifier, function_handle, *mask)? + ld_function(verifier, state, offset, function_handle, meter)? + }, + Bytecode::EarlyBind(sig_idx, k) => { + let (arg_tys, _result_tys) = fun_type(verifier, *sig_idx)?; + early_bind(verifier, arg_tys, *k)? }, - Bytecode::ClosEval(idx) => { - let (arg_tys, result_tys) = fun_type(verifier, *idx)?; - clos_eval(verifier, state, offset, arg_tys, result_tys, meter)? + Bytecode::Invoke(sig_idx) => { + let (arg_tys, result_tys) = fun_type(verifier, *sig_idx)?; + invoke(verifier, state, offset, arg_tys, result_tys, meter)? }, Bytecode::VecPack(idx, num) => { diff --git a/third_party/move/move-bytecode-verifier/src/signature.rs b/third_party/move/move-bytecode-verifier/src/signature.rs index 08dfe5d31ef73..e2c7068d56573 100644 --- a/third_party/move/move-bytecode-verifier/src/signature.rs +++ b/third_party/move/move-bytecode-verifier/src/signature.rs @@ -260,7 +260,7 @@ impl<'a> SignatureChecker<'a> { }, // Closure operations not supported by legacy signature checker - ClosPack(..) | ClosPackGeneric(..) | ClosEval(_) => { + LdFunction(..) | LdFunctionGeneric(..) | Invoke(_) | EarlyBind(..) => { return Err(PartialVMError::new(StatusCode::UNEXPECTED_VERIFIER_ERROR) .with_message("closure operations not supported".to_owned())) }, diff --git a/third_party/move/move-bytecode-verifier/src/signature_v2.rs b/third_party/move/move-bytecode-verifier/src/signature_v2.rs index c38c9723cfd33..953c26005d921 100644 --- a/third_party/move/move-bytecode-verifier/src/signature_v2.rs +++ b/third_party/move/move-bytecode-verifier/src/signature_v2.rs @@ -820,6 +820,8 @@ impl<'a, const N: usize> SignatureChecker<'a, N> { let mut checked_struct_def_insts = BTreeMap::::new(); let mut checked_struct_def_insts_with_key = BTreeMap::::new(); + let mut checked_fun_insts = BTreeMap::::new(); + let mut checked_early_bind_insts = BTreeMap::<(SignatureIndex, u8), ()>::new(); let mut checked_vec_insts = BTreeMap::::new(); let mut checked_field_insts = BTreeMap::::new(); let mut checked_variant_field_insts = BTreeMap::::new(); @@ -839,7 +841,7 @@ impl<'a, const N: usize> SignatureChecker<'a, N> { }) }; match instr { - CallGeneric(idx) | ClosPackGeneric(idx, _) => { + CallGeneric(idx) | LdFunctionGeneric(idx) => { if let btree_map::Entry::Vacant(entry) = checked_func_insts.entry(*idx) { let constraints = self.verify_function_instantiation_contextless(*idx)?; map_err(constraints.check_in_context(&ability_context))?; @@ -898,12 +900,32 @@ impl<'a, const N: usize> SignatureChecker<'a, N> { entry.insert(()); } }, - ClosEval(idx) => { - let sign = self.resolver.signature_at(*idx); - if sign.len() != 1 || !matches!(&sign.0[0], SignatureToken::Function(..)) { - return map_err(Err(PartialVMError::new( - StatusCode::CLOSURE_EVAL_REQUIRES_FUNCTION, - ))); + Invoke(idx) => map_err(self.verify_fun_sig_idx( + *idx, + &mut checked_fun_insts, + &ability_context, + ))?, + EarlyBind(idx, count) => { + map_err(self.verify_fun_sig_idx( + *idx, + &mut checked_fun_insts, + &ability_context, + ))?; + if let btree_map::Entry::Vacant(entry) = + checked_early_bind_insts.entry((*idx, *count)) + { + // Note non-function case is checked in `verify_fun_sig_idx` above. + if let Some(SignatureToken::Function(params, _results, _abilities)) = + self.resolver.signature_at(*idx).0.first() + { + if *count as usize > params.len() { + return map_err(Err(PartialVMError::new( + StatusCode::NUMBER_OF_ARGUMENTS_MISMATCH, + ) + .with_message("in EarlyBind".to_string()))); + }; + }; + entry.insert(()); } }, VecPack(idx, _) @@ -961,7 +983,7 @@ impl<'a, const N: usize> SignatureChecker<'a, N> { | LdTrue | LdFalse | Call(_) - | ClosPack(..) + | LdFunction(..) | Pack(_) | Unpack(_) | TestVariant(_) @@ -1119,6 +1141,37 @@ impl<'a, const N: usize> SignatureChecker<'a, N> { } Ok(()) } + + // Checks that a `sig_idx` parameter to `Invoke` or `EarlyBind` is well-formed. + fn verify_fun_sig_idx( + &self, + idx: SignatureIndex, + checked_fun_insts: &mut BTreeMap, + ability_context: &BitsetTypeParameterConstraints, + ) -> PartialVMResult<()> { + if let btree_map::Entry::Vacant(entry) = checked_fun_insts.entry(idx) { + let ty_args = &self.resolver.signature_at(idx).0; + if ty_args.len() != 1 { + return Err(PartialVMError::new( + StatusCode::NUMBER_OF_TYPE_ARGUMENTS_MISMATCH, + )) + .map_err(|err| { + err.with_message(format!( + "expected 1 type token for function operations, got {}", + ty_args.len() + )) + }); + } + if !ty_args[0].is_function() { + return Err(PartialVMError::new(StatusCode::INVALID_SIGNATURE_TOKEN)) + .map_err(|err| err.with_message("function required".to_string())); + } + self.verify_signature_in_context(ability_context, idx)?; + + entry.insert(()); + }; + Ok(()) + } } fn verify_module_impl(module: &CompiledModule) -> PartialVMResult<()> { diff --git a/third_party/move/move-bytecode-verifier/src/stack_usage_verifier.rs b/third_party/move/move-bytecode-verifier/src/stack_usage_verifier.rs index 427b6d88e01d7..bb86abdcc346a 100644 --- a/third_party/move/move-bytecode-verifier/src/stack_usage_verifier.rs +++ b/third_party/move/move-bytecode-verifier/src/stack_usage_verifier.rs @@ -228,13 +228,24 @@ impl<'a> StackUsageVerifier<'a> { (arg_count, return_count) }, - // ClosEval pops the number of arguments and pushes the results of the given function - // type - Bytecode::ClosEval(idx) => { - if let Some(SignatureToken::Function(args, result, _)) = + // LdFunction pushes the function handle + Bytecode::LdFunction(idx) => { + let _function_handle = self.resolver.function_handle_at(*idx); + (0, 1) + }, + Bytecode::LdFunctionGeneric(idx) => { + let func_inst = self.resolver.function_instantiation_at(*idx); + let _function_handle = self.resolver.function_handle_at(func_inst.handle); + (0, 1) + }, + + // Invoke pops a function and the number of arguments and pushes the results of the + // given function type + Bytecode::Invoke(idx) => { + if let Some(SignatureToken::Function(args, results, _)) = self.resolver.signature_at(*idx).0.first() { - ((1 + args.len()) as u64, result.len() as u64) + ((1 + args.len()) as u64, results.len() as u64) } else { // We don't know what it will pop/push, but the signature checker // ensures we never reach this @@ -242,27 +253,23 @@ impl<'a> StackUsageVerifier<'a> { } }, - // ClosPack pops the captured arguments and returns 1 value - Bytecode::ClosPack(idx, mask) => { - let function_handle = self.resolver.function_handle_at(*idx); - let arg_count = mask - .extract( - &self.resolver.signature_at(function_handle.parameters).0, - true, - ) - .len() as u64; - (arg_count, 1) - }, - Bytecode::ClosPackGeneric(idx, mask) => { - let func_inst = self.resolver.function_instantiation_at(*idx); - let function_handle = self.resolver.function_handle_at(func_inst.handle); - let arg_count = mask - .extract( - &self.resolver.signature_at(function_handle.parameters).0, - true, - ) - .len() as u64; - (arg_count, 1) + // EarlyBind pops a function value and the captured arguments and returns 1 value + Bytecode::EarlyBind(idx, arg_count) => { + if let Some(SignatureToken::Function(args, _results, _)) = + self.resolver.signature_at(*idx).0.first() + { + if args.len() <= *arg_count as usize { + (1 + *arg_count as u64, 1) + } else { + // We don't know what it will pop/push, but the signature checker + // ensures we never reach this + (0, 0) + } + } else { + // We don't know what it will pop/push, but the signature checker + // ensures we never reach this + (0, 0) + } }, // Pack performs `num_fields` pops and one push diff --git a/third_party/move/move-bytecode-verifier/src/type_safety.rs b/third_party/move/move-bytecode-verifier/src/type_safety.rs index cb6a03aada557..66b8dd4bced7d 100644 --- a/third_party/move/move-bytecode-verifier/src/type_safety.rs +++ b/third_party/move/move-bytecode-verifier/src/type_safety.rs @@ -11,10 +11,10 @@ use move_binary_format::{ control_flow_graph::ControlFlowGraph, errors::{PartialVMError, PartialVMResult}, file_format::{ - Ability, AbilitySet, Bytecode, ClosureMask, CodeOffset, FunctionDefinitionIndex, - FunctionHandle, FunctionHandleIndex, LocalIndex, Signature, SignatureToken, - SignatureToken as ST, StructDefinition, StructDefinitionIndex, StructFieldInformation, - StructHandleIndex, VariantIndex, Visibility, + AbilitySet, Bytecode, CodeOffset, FunctionDefinitionIndex, FunctionHandle, + FunctionHandleIndex, LocalIndex, Signature, SignatureToken, SignatureToken as ST, + StructDefinition, StructDefinitionIndex, StructFieldInformation, StructHandleIndex, + VariantIndex, Visibility, }, safe_assert, safe_unwrap, views::FieldOrVariantIndex, @@ -301,7 +301,7 @@ fn call( Ok(()) } -fn clos_eval( +fn invoke( verifier: &mut TypeSafetyChecker, meter: &mut impl Meter, offset: CodeOffset, @@ -312,6 +312,7 @@ fn clos_eval( safe_assert!(false); unreachable!() }; + // On top of the stack is the closure, pop it. let closure_ty = safe_unwrap!(verifier.stack.pop()); // Verify that the closure type matches the expected type @@ -331,6 +332,7 @@ fn clos_eval( .error(StatusCode::CALL_TYPE_MISMATCH_ERROR, offset) .with_message("closure ability mismatch".to_owned())); } + // Pop and verify arguments for param_ty in param_tys.iter().rev() { let arg_ty = safe_unwrap!(verifier.stack.pop()); @@ -344,36 +346,14 @@ fn clos_eval( Ok(()) } -fn clos_pack( +fn ld_function( verifier: &mut TypeSafetyChecker, meter: &mut impl Meter, - offset: CodeOffset, - func_handle_idx: FunctionHandleIndex, + _offset: CodeOffset, + function_handle_idx: &FunctionHandleIndex, type_actuals: &Signature, - mask: ClosureMask, ) -> PartialVMResult<()> { - let func_handle = verifier.resolver.function_handle_at(func_handle_idx); - // Check the captured arguments on the stack - let param_sign = verifier.resolver.signature_at(func_handle.parameters); - let captured_param_tys = mask.extract(¶m_sign.0, true); - let mut abilities = AbilitySet::ALL; - for ty in captured_param_tys.iter().rev() { - abilities = abilities.intersect(verifier.abilities(ty)?); - let arg = safe_unwrap!(verifier.stack.pop()); - if (type_actuals.is_empty() && &arg != ty) - || (!type_actuals.is_empty() && arg != instantiate(ty, type_actuals)) - { - return Err(verifier - .error(StatusCode::PACK_TYPE_MISMATCH_ERROR, offset) - .with_message("captured argument type mismatch".to_owned())); - } - // A captured argument must not be a reference - if ty.is_reference() { - return Err(verifier - .error(StatusCode::PACK_TYPE_MISMATCH_ERROR, offset) - .with_message("captured argument must not be a reference".to_owned())); - } - } + let func_handle = verifier.resolver.function_handle_at(*function_handle_idx); // In order to determine whether this closure can be storable, we need to figure whether // this function is public. @@ -385,7 +365,7 @@ fn clos_pack( // and adding visibility there. let mut is_storable = false; for fun_def in verifier.resolver.function_defs().unwrap_or(&[]) { - if fun_def.function == func_handle_idx { + if fun_def.function == *function_handle_idx { // Function defined in this module, so we can check visibility. if fun_def.visibility == Visibility::Public { is_storable = true; @@ -393,23 +373,80 @@ fn clos_pack( break; } } - if !is_storable { - abilities.remove(Ability::Store); - } - abilities.remove(Ability::Key); + let abilities = if is_storable { + AbilitySet::PUBLIC_FUNCTIONS + } else { + AbilitySet::PRIVATE_FUNCTIONS + }; // Construct the resulting function type - let not_captured_param_tys = mask.extract(¶m_sign.0, false); + let parameters = verifier.resolver.signature_at(func_handle.parameters); let ret_sign = verifier.resolver.signature_at(func_handle.return_); verifier.push( meter, instantiate( - &SignatureToken::Function(not_captured_param_tys, ret_sign.0.to_vec(), abilities), + &SignatureToken::Function(parameters.0.to_vec(), ret_sign.0.to_vec(), abilities), type_actuals, ), ) } +fn early_bind( + verifier: &mut TypeSafetyChecker, + meter: &mut impl Meter, + offset: CodeOffset, + expected_ty: &SignatureToken, + count: u8, +) -> PartialVMResult<()> { + let count = count as usize; + let SignatureToken::Function(param_tys, ret_tys, abilities) = expected_ty else { + // The signature checker has ensured this is a function + safe_assert!(false); + unreachable!() + }; + + // On top of the stack is the closure, pop it. + let closure_ty = safe_unwrap!(verifier.stack.pop()); + // Verify that the closure type matches the expected type + if &closure_ty != expected_ty { + return Err(verifier + .error(StatusCode::CALL_TYPE_MISMATCH_ERROR, offset) + .with_message("closure type mismatch".to_owned())); + } + // Verify that the abilities match + let SignatureToken::Function(_, _, closure_abilities) = closure_ty else { + // Ensured above, but never panic + safe_assert!(false); + unreachable!() + }; + if !abilities.is_subset(closure_abilities) { + return Err(verifier + .error(StatusCode::CALL_TYPE_MISMATCH_ERROR, offset) + .with_message("closure ability mismatch".to_owned())); + } + + if param_tys.len() < count { + return Err(verifier.error(StatusCode::NUMBER_OF_TYPE_ARGUMENTS_MISMATCH, offset)); + } + + let binding_param_tys = ¶m_tys[0..count]; + let remaining_param_tys = ¶m_tys[count..]; + + // Pop and verify arguments + for param_ty in binding_param_tys.iter().rev() { + let arg_ty = safe_unwrap!(verifier.stack.pop()); + if &arg_ty != param_ty { + return Err(verifier.error(StatusCode::CALL_TYPE_MISMATCH_ERROR, offset)); + } + } + let result_ty = SignatureToken::Function( + (*remaining_param_tys).to_vec(), + ret_tys.to_vec(), + *abilities, + ); + verifier.push(meter, result_ty) +} + fn type_fields_signature( verifier: &mut TypeSafetyChecker, _meter: &mut impl Meter, // TODO: metering @@ -835,19 +872,25 @@ fn verify_instr( call(verifier, meter, offset, func_handle, type_args)? }, - Bytecode::ClosPack(idx, mask) => { - clos_pack(verifier, meter, offset, *idx, &Signature(vec![]), *mask)? - }, - Bytecode::ClosPackGeneric(idx, mask) => { + Bytecode::LdFunction(idx) => ld_function(verifier, meter, offset, idx, &Signature(vec![]))?, + + Bytecode::LdFunctionGeneric(idx) => { let func_inst = verifier.resolver.function_instantiation_at(*idx); let type_args = &verifier.resolver.signature_at(func_inst.type_parameters); verifier.charge_tys(meter, &type_args.0)?; - clos_pack(verifier, meter, offset, func_inst.handle, type_args, *mask)? + ld_function(verifier, meter, offset, &func_inst.handle, type_args)? }, - Bytecode::ClosEval(idx) => { + + Bytecode::EarlyBind(idx, count) => { + // The signature checker has verified this is a function type. + let expected_ty = safe_unwrap!(verifier.resolver.signature_at(*idx).0.first()); + early_bind(verifier, meter, offset, expected_ty, *count)? + }, + + Bytecode::Invoke(idx) => { // The signature checker has verified this is a function type. let expected_ty = safe_unwrap!(verifier.resolver.signature_at(*idx).0.first()); - clos_eval(verifier, meter, offset, expected_ty)? + invoke(verifier, meter, offset, expected_ty)? }, Bytecode::Pack(idx) => { diff --git a/third_party/move/move-core/types/src/vm_status.rs b/third_party/move/move-core/types/src/vm_status.rs index a33744afd0009..9aaccff1feecd 100644 --- a/third_party/move/move-core/types/src/vm_status.rs +++ b/third_party/move/move-core/types/src/vm_status.rs @@ -734,16 +734,14 @@ pub enum StatusCode { ZERO_VARIANTS_ERROR = 1130, // A feature is not enabled. FEATURE_NOT_ENABLED = 1131, - // Closure mask invalid - INVALID_CLOSURE_MASK = 1132, - // Closure eval type is not a function - CLOSURE_EVAL_REQUIRES_FUNCTION = 1133, + // Invoke or EarlyBind parameter type is not a function + REQUIRES_FUNCTION = 1132, // Reserved error code for future use - RESERVED_VERIFICATION_ERROR_2 = 1134, - RESERVED_VERIFICATION_ERROR_3 = 1135, - RESERVED_VERIFICATION_ERROR_4 = 1136, - RESERVED_VERIFICATION_ERROR_5 = 1137, + RESERVED_VERIFICATION_ERROR_2 = 1133, + RESERVED_VERIFICATION_ERROR_3 = 1134, + RESERVED_VERIFICATION_ERROR_4 = 1135, + RESERVED_VERIFICATION_ERROR_5 = 1136, // These are errors that the VM might raise if a violation of internal // invariants takes place. @@ -784,13 +782,14 @@ pub enum StatusCode { // Should never be committed on chain SPECULATIVE_EXECUTION_ABORT_ERROR = 2024, ACCESS_CONTROL_INVARIANT_VIOLATION = 2025, + LD_FUNCTION_NONEMPTY_ACQUIRES = 2026, // Reserved error code for future use - RESERVED_INVARIANT_VIOLATION_ERROR_1 = 2026, - RESERVED_INVARIANT_VIOLATION_ERROR_2 = 2027, - RESERVED_INVARIANT_VIOLATION_ERROR_3 = 2028, - RESERVED_INVARIANT_VIOLATION_ERROR_4 = 2039, - RESERVED_INVARIANT_VIOLATION_ERROR_5 = 2040, + RESERVED_INVARIANT_VIOLATION_ERROR_1 = 2027, + RESERVED_INVARIANT_VIOLATION_ERROR_2 = 2028, + RESERVED_INVARIANT_VIOLATION_ERROR_3 = 2039, + RESERVED_INVARIANT_VIOLATION_ERROR_4 = 2040, + RESERVED_INVARIANT_VIOLATION_ERROR_5 = 2041, // Errors that can arise from binary decoding (deserialization) // Deserialization Errors: 3000-3999 diff --git a/third_party/move/move-model/bytecode/src/function_target_pipeline.rs b/third_party/move/move-model/bytecode/src/function_target_pipeline.rs index 4fd4d5b0e0485..187a92e907d1f 100644 --- a/third_party/move/move-model/bytecode/src/function_target_pipeline.rs +++ b/third_party/move/move-model/bytecode/src/function_target_pipeline.rs @@ -353,7 +353,7 @@ impl FunctionTargetPipeline { /// Build the call graph. /// Nodes of this call graph are qualified function ids. - /// An edge A -> B in the call graph means that function A calls function B. + /// An edge A -> B in the call graph means that function A calls function B or refers to function B. fn build_call_graph( env: &GlobalEnv, targets: &FunctionTargetsHolder, @@ -369,7 +369,7 @@ impl FunctionTargetPipeline { let fun_env = env.get_function(fun_id); for callee in fun_env .get_used_functions() - .expect("called functions must be computed") + .expect("used functions must be computed") { let dst_idx = nodes .get(callee) diff --git a/third_party/move/move-model/bytecode/src/stackless_bytecode_generator.rs b/third_party/move/move-model/bytecode/src/stackless_bytecode_generator.rs index 0b95bbc1cfd93..f83916b3b4e87 100644 --- a/third_party/move/move-model/bytecode/src/stackless_bytecode_generator.rs +++ b/third_party/move/move-model/bytecode/src/stackless_bytecode_generator.rs @@ -293,9 +293,10 @@ impl<'a> StacklessBytecodeGenerator<'a> { }; match bytecode { - MoveBytecode::ClosPack(..) - | MoveBytecode::ClosPackGeneric(..) - | MoveBytecode::ClosEval(..) => { + MoveBytecode::Invoke(..) + | MoveBytecode::LdFunction(..) + | MoveBytecode::LdFunctionGeneric(..) + | MoveBytecode::EarlyBind(..) => { unimplemented!("stackless bytecode generation for closure opcodes") }, MoveBytecode::Pop => { diff --git a/third_party/move/move-vm/runtime/src/interpreter.rs b/third_party/move/move-vm/runtime/src/interpreter.rs index 84a855a7c51d2..407720c217fc4 100644 --- a/third_party/move/move-vm/runtime/src/interpreter.rs +++ b/third_party/move/move-vm/runtime/src/interpreter.rs @@ -1604,8 +1604,11 @@ impl Frame { instruction: &Bytecode, ) -> PartialVMResult<()> { match instruction { - // TODO: implement closures - Bytecode::ClosPack(..) | Bytecode::ClosPackGeneric(..) | Bytecode::ClosEval(..) => { + // TODO(LAMBDA): implement closures + Bytecode::LdFunction(..) + | Bytecode::LdFunctionGeneric(..) + | Bytecode::Invoke(..) + | Bytecode::EarlyBind(..) => { return Err(PartialVMError::new(StatusCode::UNIMPLEMENTED_FEATURE) .with_message("closure opcodes in interpreter".to_owned())) }, @@ -1735,7 +1738,10 @@ impl Frame { match instruction { // TODO: implement closures - Bytecode::ClosPack(..) | Bytecode::ClosPackGeneric(..) | Bytecode::ClosEval(..) => { + Bytecode::LdFunction(..) + | Bytecode::LdFunctionGeneric(..) + | Bytecode::Invoke(..) + | Bytecode::EarlyBind(..) => { return Err(PartialVMError::new(StatusCode::UNIMPLEMENTED_FEATURE) .with_message("closure opcodes in interpreter".to_owned())) }, @@ -2336,10 +2342,11 @@ impl Frame { } match instruction { - // TODO: implement closures - Bytecode::ClosPack(..) - | Bytecode::ClosPackGeneric(..) - | Bytecode::ClosEval(..) => { + // TODO(LAMBDA): implement closures + Bytecode::LdFunction(..) + | Bytecode::LdFunctionGeneric(..) + | Bytecode::Invoke(..) + | Bytecode::EarlyBind(..) => { return Err(PartialVMError::new(StatusCode::UNIMPLEMENTED_FEATURE) .with_message("closure opcodes in interpreter".to_owned())) }, diff --git a/third_party/move/move-vm/test-utils/src/gas_schedule.rs b/third_party/move/move-vm/test-utils/src/gas_schedule.rs index dc22263926f53..be4683e92f7d1 100644 --- a/third_party/move/move-vm/test-utils/src/gas_schedule.rs +++ b/third_party/move/move-vm/test-utils/src/gas_schedule.rs @@ -703,6 +703,13 @@ pub fn zero_cost_instruction_table() -> Vec<(Bytecode, GasCost)> { (CastU16, GasCost::new(0, 0)), (CastU32, GasCost::new(0, 0)), (CastU256, GasCost::new(0, 0)), + (LdFunction(FunctionHandleIndex::new(0)), GasCost::new(0, 0)), + ( + LdFunctionGeneric(FunctionInstantiationIndex::new(0)), + GasCost::new(0, 0), + ), + (Invoke(SignatureIndex::new(0)), GasCost::new(0, 0)), + (EarlyBind(SignatureIndex::new(0), 0u8), GasCost::new(0, 0)), ] } @@ -876,13 +883,23 @@ pub fn bytecode_instruction_costs() -> Vec<(Bytecode, GasCost)> { (CastU16, GasCost::new(2, 1)), (CastU32, GasCost::new(2, 1)), (CastU256, GasCost::new(2, 1)), + ( + LdFunction(FunctionHandleIndex::new(0)), + GasCost::new(1132, 1), + ), + ( + LdFunctionGeneric(FunctionInstantiationIndex::new(0)), + GasCost::new(1132, 1), + ), + (Invoke(SignatureIndex::new(0)), GasCost::new(1132, 1)), + ( + EarlyBind(SignatureIndex::new(0), 0u8), + GasCost::new(1132, 1), + ), ] } pub static INITIAL_COST_SCHEDULE: Lazy = Lazy::new(|| { - let mut instrs = bytecode_instruction_costs(); - // Note that the DiemVM is expecting the table sorted by instruction order. - instrs.sort_by_key(|cost| instruction_key(&cost.0)); - + let instrs = bytecode_instruction_costs(); new_from_instructions(instrs) }); diff --git a/third_party/move/tools/move-disassembler/src/disassembler.rs b/third_party/move/tools/move-disassembler/src/disassembler.rs index e3148cea698cb..a0d9e15417156 100644 --- a/third_party/move/tools/move-disassembler/src/disassembler.rs +++ b/third_party/move/tools/move-disassembler/src/disassembler.rs @@ -644,9 +644,103 @@ impl<'a> Disassembler<'a> { default_location: &Loc, ) -> Result { match instruction { - Bytecode::ClosPack(..) | Bytecode::ClosPackGeneric(..) | Bytecode::ClosEval(..) => { - bail!("closure opcodes not implemented") + Bytecode::LdFunction(method_idx) => { + let function_handle = self.source_mapper.bytecode.function_handle_at(*method_idx); + let module_handle = self + .source_mapper + .bytecode + .module_handle_at(function_handle.module); + let fcall_name = self.get_function_string(module_handle, function_handle); + let type_arguments = self + .source_mapper + .bytecode + .signature_at(function_handle.parameters) + .0 + .iter() + .map(|sig_tok| self.disassemble_sig_tok(sig_tok.clone(), &[])) + .collect::>>()? + .join(", "); + let type_rets = self + .source_mapper + .bytecode + .signature_at(function_handle.return_) + .0 + .iter() + .map(|sig_tok| self.disassemble_sig_tok(sig_tok.clone(), &[])) + .collect::>>()?; + Ok(format!( + "LdFunction {}({}){}", + fcall_name, + type_arguments, + Self::format_ret_type(&type_rets) + )) + }, + Bytecode::LdFunctionGeneric(method_idx) => { + let func_inst = self + .source_mapper + .bytecode + .function_instantiation_at(*method_idx); + let function_handle = self + .source_mapper + .bytecode + .function_handle_at(func_inst.handle); + let module_handle = self + .source_mapper + .bytecode + .module_handle_at(function_handle.module); + let fcall_name = self.get_function_string(module_handle, function_handle); + let ty_params = self + .source_mapper + .bytecode + .signature_at(func_inst.type_parameters) + .0 + .iter() + .map(|sig_tok| { + Ok(( + self.disassemble_sig_tok( + sig_tok.clone(), + &function_source_map.type_parameters, + )?, + *default_location, + )) + }) + .collect::>>()?; + let type_arguments = self + .source_mapper + .bytecode + .signature_at(function_handle.parameters) + .0 + .iter() + .map(|sig_tok| self.disassemble_sig_tok(sig_tok.clone(), &ty_params)) + .collect::>>()? + .join(", "); + let type_rets = self + .source_mapper + .bytecode + .signature_at(function_handle.return_) + .0 + .iter() + .map(|sig_tok| self.disassemble_sig_tok(sig_tok.clone(), &ty_params)) + .collect::>>()?; + Ok(format!( + "LdFunctionGeneric {}{}({}){}", + fcall_name, + Self::format_type_params( + &ty_params.into_iter().map(|(s, _)| s).collect::>() + ), + type_arguments, + Self::format_ret_type(&type_rets) + )) + }, + Bytecode::Invoke(idx) => { + let _signature = self.source_mapper.bytecode.signature_at(*idx); + Ok(format!("Invoke({})", idx)) }, + Bytecode::EarlyBind(idx, count) => { + let _signature = self.source_mapper.bytecode.signature_at(*idx); + Ok(format!("EarlyBind({}, {})", idx, count)) + }, + Bytecode::LdConst(idx) => { let constant = self.source_mapper.bytecode.constant_at(*idx); Ok(format!(