diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 204f3fccb16f..3d081951a526 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -3,7 +3,10 @@ use crate::shared::{entities::EntityRefs, immediates::Immediates}; use std::rc::Rc; pub(crate) struct Formats { + pub(crate) atomic_cas: Rc, + pub(crate) atomic_rmw: Rc, pub(crate) binary: Rc, + pub(crate) binary_imm8: Rc, pub(crate) binary_imm64: Rc, pub(crate) branch: Rc, pub(crate) branch_float: Rc, @@ -17,7 +20,6 @@ pub(crate) struct Formats { pub(crate) cond_trap: Rc, pub(crate) copy_special: Rc, pub(crate) copy_to_ssa: Rc, - pub(crate) binary_imm8: Rc, pub(crate) float_compare: Rc, pub(crate) float_cond: Rc, pub(crate) float_cond_trap: Rc, @@ -32,6 +34,7 @@ pub(crate) struct Formats { pub(crate) jump: Rc, pub(crate) load: Rc, pub(crate) load_complex: Rc, + pub(crate) load_no_offset: Rc, pub(crate) multiary: Rc, pub(crate) nullary: Rc, pub(crate) reg_fill: Rc, @@ -42,6 +45,7 @@ pub(crate) struct Formats { pub(crate) stack_store: Rc, pub(crate) store: Rc, pub(crate) store_complex: Rc, + pub(crate) store_no_offset: Rc, pub(crate) table_addr: Rc, pub(crate) ternary: Rc, pub(crate) ternary_imm8: Rc, @@ -202,6 +206,21 @@ impl Formats { func_addr: Builder::new("FuncAddr").imm(&entities.func_ref).build(), + atomic_rmw: Builder::new("AtomicRmw") + .imm(&imm.memflags) + .imm(&imm.atomic_rmw_op) + .value() + .value() + .build(), + + atomic_cas: Builder::new("AtomicCas") + .imm(&imm.memflags) + .value() + .value() + .value() + .typevar_operand(2) + .build(), + load: Builder::new("Load") .imm(&imm.memflags) .value() @@ -214,6 +233,11 @@ impl Formats { .imm(&imm.offset32) .build(), + load_no_offset: Builder::new("LoadNoOffset") + .imm(&imm.memflags) + .value() + .build(), + store: Builder::new("Store") .imm(&imm.memflags) .value() @@ -228,6 +252,12 @@ impl Formats { .imm(&imm.offset32) .build(), + store_no_offset: Builder::new("StoreNoOffset") + .imm(&imm.memflags) + .value() + .value() + .build(), + stack_load: Builder::new("StackLoad") .imm(&entities.stack_slot) .imm(&imm.offset32) diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs index d8382e40677d..0aa4129daf33 100644 --- a/cranelift/codegen/meta/src/shared/immediates.rs +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -71,6 +71,9 @@ pub(crate) struct Immediates { /// /// The Rust enum type also has a `User(u16)` variant for user-provided trap codes. pub trapcode: OperandKind, + + /// A code indicating the arithmetic operation to perform in an atomic_rmw memory access. + pub atomic_rmw_op: OperandKind, } fn new_imm(format_field_name: &'static str, rust_type: &'static str) -> OperandKind { @@ -156,6 +159,17 @@ impl Immediates { trapcode_values.insert("int_divz", "IntegerDivisionByZero"); new_enum("code", "ir::TrapCode", trapcode_values).with_doc("A trap reason code.") }, + atomic_rmw_op: { + let mut atomic_rmw_op_values = HashMap::new(); + atomic_rmw_op_values.insert("add", "Add"); + atomic_rmw_op_values.insert("sub", "Sub"); + atomic_rmw_op_values.insert("and", "And"); + atomic_rmw_op_values.insert("or", "Or"); + atomic_rmw_op_values.insert("xor", "Xor"); + atomic_rmw_op_values.insert("xchg", "Xchg"); + new_enum("op", "ir::AtomicRmwOp", atomic_rmw_op_values) + .with_doc("Atomic Read-Modify-Write Ops") + }, } } } diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 057ae7a0b2a6..93f80d498e93 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -4305,5 +4305,109 @@ pub(crate) fn define( .is_ghost(true), ); + // Instructions relating to atomic memory accesses and fences + let AtomicMem = &TypeVar::new( + "AtomicMem", + "Any type that can be stored in memory, which can be used in an atomic operation", + TypeSetBuilder::new().ints(8..64).build(), + ); + let x = &Operand::new("x", AtomicMem).with_doc("Value to be atomically stored"); + let a = &Operand::new("a", AtomicMem).with_doc("Value atomically loaded"); + let e = &Operand::new("e", AtomicMem).with_doc("Expected value in CAS"); + let p = &Operand::new("p", iAddr); + let MemFlags = &Operand::new("MemFlags", &imm.memflags); + let AtomicRmwOp = &Operand::new("AtomicRmwOp", &imm.atomic_rmw_op); + + ig.push( + Inst::new( + "atomic_rmw", + r#" + Atomically read-modify-write memory at `p`, with second operand `x`. The old value is + returned. `p` has the type of the target word size, and `x` may be an integer type of + 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the + same as the type of `x`. This operation is sequentially consistent and creates + happens-before edges that order normal (non-atomic) loads and stores. + "#, + &formats.atomic_rmw, + ) + .operands_in(vec![MemFlags, AtomicRmwOp, p, x]) + .operands_out(vec![a]) + .can_load(true) + .can_store(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "atomic_cas", + r#" + Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`, + storing `x` if the value at `p` equals `e`. The old value at `p` is returned, + regardless of whether the operation succeeds or fails. `p` has the type of the target + word size, and `x` and `e` must have the same type and the same size, which may be an + integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned + value is the same as the type of `x` and `e`. This operation is sequentially + consistent and creates happens-before edges that order normal (non-atomic) loads and + stores. + "#, + &formats.atomic_cas, + ) + .operands_in(vec![MemFlags, p, e, x]) + .operands_out(vec![a]) + .can_load(true) + .can_store(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "atomic_load", + r#" + Atomically load from memory at `p`. + + This is a polymorphic instruction that can load any value type which has a memory + representation. It should only be used for integer types with 8, 16, 32 or 64 bits. + This operation is sequentially consistent and creates happens-before edges that order + normal (non-atomic) loads and stores. + "#, + &formats.load_no_offset, + ) + .operands_in(vec![MemFlags, p]) + .operands_out(vec![a]) + .can_load(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "atomic_store", + r#" + Atomically store `x` to memory at `p`. + + This is a polymorphic instruction that can store any value type with a memory + representation. It should only be used for integer types with 8, 16, 32 or 64 bits. + This operation is sequentially consistent and creates happens-before edges that order + normal (non-atomic) loads and stores. + "#, + &formats.store_no_offset, + ) + .operands_in(vec![MemFlags, x, p]) + .can_store(true) + .other_side_effects(true), + ); + + ig.push( + Inst::new( + "fence", + r#" + A memory fence. This must provide ordering to ensure that, at a minimum, neither loads + nor stores of any kind may move forwards or backwards across the fence. This operation + is sequentially consistent. + "#, + &formats.nullary, + ) + .other_side_effects(true), + ); + ig.build() } diff --git a/cranelift/codegen/src/ir/atomic_rmw_op.rs b/cranelift/codegen/src/ir/atomic_rmw_op.rs new file mode 100644 index 000000000000..c93756147ae2 --- /dev/null +++ b/cranelift/codegen/src/ir/atomic_rmw_op.rs @@ -0,0 +1,52 @@ +/// Describes the arithmetic operation in an atomic memory read-modify-write operation. +use core::fmt::{self, Display, Formatter}; +use core::str::FromStr; +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +/// Describes the arithmetic operation in an atomic memory read-modify-write operation. +pub enum AtomicRmwOp { + /// Add + Add, + /// Sub + Sub, + /// And + And, + /// Or + Or, + /// Xor + Xor, + /// Exchange + Xchg, +} + +impl Display for AtomicRmwOp { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + let s = match self { + AtomicRmwOp::Add => "add", + AtomicRmwOp::Sub => "sub", + AtomicRmwOp::And => "and", + AtomicRmwOp::Or => "or", + AtomicRmwOp::Xor => "xor", + AtomicRmwOp::Xchg => "xchg", + }; + f.write_str(s) + } +} + +impl FromStr for AtomicRmwOp { + type Err = (); + fn from_str(s: &str) -> Result { + match s { + "add" => Ok(AtomicRmwOp::Add), + "sub" => Ok(AtomicRmwOp::Sub), + "and" => Ok(AtomicRmwOp::And), + "or" => Ok(AtomicRmwOp::Or), + "xor" => Ok(AtomicRmwOp::Xor), + "xchg" => Ok(AtomicRmwOp::Xchg), + _ => Err(()), + } + } +} diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 7f3c36b7be0f..4dbe90df34d6 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -1,5 +1,6 @@ //! Representation of Cranelift IR functions. +mod atomic_rmw_op; mod builder; pub mod constant; pub mod dfg; @@ -26,6 +27,7 @@ mod valueloc; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; +pub use crate::ir::atomic_rmw_op::AtomicRmwOp; pub use crate::ir::builder::{ InsertBuilder, InstBuilder, InstBuilderBase, InstInserterBase, ReplaceBuilder, }; diff --git a/cranelift/codegen/src/ir/trapcode.rs b/cranelift/codegen/src/ir/trapcode.rs index 0f1f62e3b6d0..612c979a0abf 100644 --- a/cranelift/codegen/src/ir/trapcode.rs +++ b/cranelift/codegen/src/ir/trapcode.rs @@ -24,6 +24,9 @@ pub enum TrapCode { /// offset-guard pages. HeapOutOfBounds, + /// A wasm atomic operation was presented with a not-naturally-aligned linear-memory address. + HeapMisaligned, + /// A `table_addr` instruction detected an out-of-bounds error. TableOutOfBounds, @@ -59,6 +62,7 @@ impl Display for TrapCode { let identifier = match *self { StackOverflow => "stk_ovf", HeapOutOfBounds => "heap_oob", + HeapMisaligned => "heap_misaligned", TableOutOfBounds => "table_oob", IndirectCallToNull => "icall_null", BadSignature => "bad_sig", @@ -81,6 +85,7 @@ impl FromStr for TrapCode { match s { "stk_ovf" => Ok(StackOverflow), "heap_oob" => Ok(HeapOutOfBounds), + "heap_misaligned" => Ok(HeapMisaligned), "table_oob" => Ok(TableOutOfBounds), "icall_null" => Ok(IndirectCallToNull), "bad_sig" => Ok(BadSignature), @@ -101,9 +106,10 @@ mod tests { use alloc::string::ToString; // Everything but user-defined codes. - const CODES: [TrapCode; 10] = [ + const CODES: [TrapCode; 11] = [ TrapCode::StackOverflow, TrapCode::HeapOutOfBounds, + TrapCode::HeapMisaligned, TrapCode::TableOutOfBounds, TrapCode::IndirectCallToNull, TrapCode::BadSignature, diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 43dcc816e531..255e22fcfb88 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -4,7 +4,7 @@ #![allow(dead_code)] use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8}; -use crate::ir::Type; +use crate::ir::{AtomicRmwOp, Type}; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::lower::ty_bits; use crate::machinst::MachLabel; @@ -14,6 +14,9 @@ use regalloc::{RealRegUniverse, Reg, Writable}; use core::convert::Into; use std::string::String; +//============================================================================= +// Instruction sub-components: shift and extend descriptors + /// A shift operator for a register or immediate. #[derive(Clone, Copy, Debug)] #[repr(u8)] @@ -645,3 +648,30 @@ impl VectorSize { } } } + +//============================================================================= +// Instruction sub-components: atomic memory update operations + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[repr(u8)] +pub enum AtomicRMWOp { + Add, + Sub, + And, + Or, + Xor, + Xchg, +} + +impl AtomicRMWOp { + pub fn from(ir_op: AtomicRmwOp) -> Self { + match ir_op { + AtomicRmwOp::Add => AtomicRMWOp::Add, + AtomicRmwOp::Sub => AtomicRMWOp::Sub, + AtomicRmwOp::And => AtomicRMWOp::And, + AtomicRmwOp::Or => AtomicRMWOp::Or, + AtomicRmwOp::Xor => AtomicRMWOp::Xor, + AtomicRmwOp::Xchg => AtomicRMWOp::Xchg, + } + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 9a280e0d01ec..8b063d36c12f 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -378,6 +378,39 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: | machreg_to_vec(rd.to_reg()) } +fn enc_dmb_ish() -> u32 { + 0xD5033BBF +} + +fn enc_ldxr(ty: Type, rt: Writable, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00001000_01011111_01111100_00000000 + | (sz << 30) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt.to_reg()) +} + +fn enc_stxr(ty: Type, rs: Writable, rt: Reg, rn: Reg) -> u32 { + let sz = match ty { + I64 => 0b11, + I32 => 0b10, + I16 => 0b01, + I8 => 0b00, + _ => unreachable!(), + }; + 0b00001000_00000000_01111100_00000000 + | (sz << 30) + | (machreg_to_gpr(rs.to_reg()) << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_gpr(rt) +} + /// State carried between emissions of a sequence of instructions. #[derive(Default, Clone, Debug)] pub struct EmitState { @@ -1005,6 +1038,219 @@ impl MachInstEmit for Inst { } => { sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); } + &Inst::AtomicRMW { ty, op, srcloc } => { + /* Emit this: + dmb ish + again: + ldxr{,b,h} x/w27, [x25] + op x28, x27, x26 // op is add,sub,and,orr,eor + stxr{,b,h} w24, x/w28, [x25] + cbnz x24, again + dmb ish + + Operand conventions: + IN: x25 (addr), x26 (2nd arg for op) + OUT: x27 (old value), x24 (trashed), x28 (trashed) + + It is unfortunate that, per the ARM documentation, x28 cannot be used for + both the store-data and success-flag operands of stxr. This causes the + instruction's behaviour to be "CONSTRAINED UNPREDICTABLE", so we use x24 + instead for the success-flag. + + In the case where the operation is 'xchg', the second insn is instead + mov x28, x26 + so that we simply write in the destination, the "2nd arg for op". + */ + let xzr = zero_reg(); + let x24 = xreg(24); + let x25 = xreg(25); + let x26 = xreg(26); + let x27 = xreg(27); + let x28 = xreg(28); + let x24wr = writable_xreg(24); + let x27wr = writable_xreg(27); + let x28wr = writable_xreg(28); + let again_label = sink.get_label(); + + sink.put4(enc_dmb_ish()); // dmb ish + + // again: + sink.bind_label(again_label); + if let Some(srcloc) = srcloc { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25] + + if op == AtomicRMWOp::Xchg { + // mov x28, x26 + sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26)) + } else { + // add/sub/and/orr/eor x28, x27, x26 + let bits_31_21 = match op { + AtomicRMWOp::Add => 0b100_01011_00_0, + AtomicRMWOp::Sub => 0b110_01011_00_0, + AtomicRMWOp::And => 0b100_01010_00_0, + AtomicRMWOp::Or => 0b101_01010_00_0, + AtomicRMWOp::Xor => 0b110_01010_00_0, + AtomicRMWOp::Xchg => unreachable!(), + }; + sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26)); + } + + if let Some(srcloc) = srcloc { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25] + + // cbnz w24, again + // Note, we're actually testing x24, and relying on the default zero-high-half + // rule in the assignment that `stxr` does. + let br_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(again_label), + CondBrKind::NotZero(x24), + )); + sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); + + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::AtomicCAS { ty, srcloc } => { + /* Emit this: + dmb ish + again: + ldxr{,b,h} x/w27, [x25] + and x24, x26, MASK (= 2^size_bits - 1) + cmp x27, x24 + b.ne out + stxr{,b,h} w24, x/w28, [x25] + cbnz x24, again + out: + dmb ish + + Operand conventions: + IN: x25 (addr), x26 (expected value), x28 (replacement value) + OUT: x27 (old value), x24 (trashed) + */ + let xzr = zero_reg(); + let x24 = xreg(24); + let x25 = xreg(25); + let x26 = xreg(26); + let x27 = xreg(27); + let x28 = xreg(28); + let xzrwr = writable_zero_reg(); + let x24wr = writable_xreg(24); + let x27wr = writable_xreg(27); + let again_label = sink.get_label(); + let out_label = sink.get_label(); + + sink.put4(enc_dmb_ish()); // dmb ish + + // again: + sink.bind_label(again_label); + if let Some(srcloc) = srcloc { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25] + + if ty == I64 { + // mov x24, x26 + sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x24wr, xzr, x26)) + } else { + // and x24, x26, 0xFF/0xFFFF/0xFFFFFFFF + let (mask, s) = match ty { + I8 => (0xFF, 7), + I16 => (0xFFFF, 15), + I32 => (0xFFFFFFFF, 31), + _ => unreachable!(), + }; + sink.put4(enc_arith_rr_imml( + 0b100_100100, + ImmLogic::from_n_r_s(mask, true, 0, s, OperandSize::Size64).enc_bits(), + x26, + x24wr, + )) + } + + // cmp x27, x24 (== subs xzr, x27, x24) + sink.put4(enc_arith_rrr(0b111_01011_00_0, 0b000000, xzrwr, x27, x24)); + + // b.ne out + let br_out_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(out_label), + CondBrKind::Cond(Cond::Ne), + )); + sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19); + + if let Some(srcloc) = srcloc { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + sink.put4(enc_stxr(ty, x24wr, x28, x25)); // stxr w24, x28, [x25] + + // cbnz w24, again. + // Note, we're actually testing x24, and relying on the default zero-high-half + // rule in the assignment that `stxr` does. + let br_again_offset = sink.cur_offset(); + sink.put4(enc_conditional_br( + BranchTarget::Label(again_label), + CondBrKind::NotZero(x24), + )); + sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19); + + // out: + sink.bind_label(out_label); + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::AtomicLoad { + ty, + r_data, + r_addr, + srcloc, + } => { + let op = match ty { + I8 => 0b0011100001, + I16 => 0b0111100001, + I32 => 0b1011100001, + I64 => 0b1111100001, + _ => unreachable!(), + }; + sink.put4(enc_dmb_ish()); // dmb ish + + if let Some(srcloc) = srcloc { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/); + sink.put4(enc_ldst_uimm12( + op, + uimm12scaled_zero, + r_addr, + r_data.to_reg(), + )); + } + &Inst::AtomicStore { + ty, + r_data, + r_addr, + srcloc, + } => { + let op = match ty { + I8 => 0b0011100000, + I16 => 0b0111100000, + I32 => 0b1011100000, + I64 => 0b1111100000, + _ => unreachable!(), + }; + + if let Some(srcloc) = srcloc { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + let uimm12scaled_zero = UImm12Scaled::zero(I8 /*irrelevant*/); + sink.put4(enc_ldst_uimm12(op, uimm12scaled_zero, r_addr, r_data)); + sink.put4(enc_dmb_ish()); // dmb ish + } + &Inst::Fence {} => { + sink.put4(enc_dmb_ish()); // dmb ish + } &Inst::FpuMove64 { rd, rn } => { sink.put4(enc_vecmov(/* 16b = */ false, rd, rn)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index c7d01d679e64..5f00e3c7fdf3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -4262,6 +4262,90 @@ fn test_aarch64_binemit() { "frintn d23, d24", )); + insns.push(( + Inst::AtomicRMW { + ty: I16, + op: AtomicRMWOp::Xor, + srcloc: None, + }, + "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5", + "atomically { 16_bits_at_[x25]) Xor= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }", + )); + + insns.push(( + Inst::AtomicRMW { + ty: I32, + op: AtomicRMWOp::Xchg, + srcloc: None, + }, + "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5", + "atomically { 32_bits_at_[x25]) Xchg= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }", + )); + + insns.push(( + Inst::AtomicCAS { + ty: I8, + srcloc: None, + }, + "BF3B03D53B7F5F08581F40927F0318EB610000543C7F180878FFFFB5BF3B03D5", + "atomically { compare-and-swap(8_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + )); + + insns.push(( + Inst::AtomicCAS { + ty: I64, + srcloc: None, + }, + "BF3B03D53B7F5FC8F8031AAA7F0318EB610000543C7F18C878FFFFB5BF3B03D5", + "atomically { compare-and-swap(64_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }" + )); + + insns.push(( + Inst::AtomicLoad { + ty: I8, + r_data: writable_xreg(7), + r_addr: xreg(28), + srcloc: None, + }, + "BF3B03D587034039", + "atomically { x7 = zero_extend_8_bits_at[x28] }", + )); + + insns.push(( + Inst::AtomicLoad { + ty: I64, + r_data: writable_xreg(28), + r_addr: xreg(7), + srcloc: None, + }, + "BF3B03D5FC0040F9", + "atomically { x28 = zero_extend_64_bits_at[x7] }", + )); + + insns.push(( + Inst::AtomicStore { + ty: I16, + r_data: xreg(17), + r_addr: xreg(8), + srcloc: None, + }, + "11010079BF3B03D5", + "atomically { 16_bits_at[x8] = x17 }", + )); + + insns.push(( + Inst::AtomicStore { + ty: I32, + r_data: xreg(18), + r_addr: xreg(7), + srcloc: None, + }, + "F20000B9BF3B03D5", + "atomically { 32_bits_at[x7] = x18 }", + )); + + insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish")); + let rru = create_reg_universe(&settings::Flags::new(settings::builder())); for (insn, expected_encoding, expected_printing) in insns { println!( diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index 7561d5ff4652..f1a98ab66c06 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -328,8 +328,7 @@ impl Imm12 { } /// An immediate for logical instructions. -#[derive(Clone, Debug)] -#[cfg_attr(test, derive(PartialEq))] +#[derive(Clone, Debug, PartialEq)] pub struct ImmLogic { /// The actual value. value: u64, @@ -551,6 +550,37 @@ impl ImmLogic { // For every ImmLogical immediate, the inverse can also be encoded. Self::maybe_from_u64(!self.value, self.size.to_ty()).unwrap() } + + /// This provides a safe(ish) way to avoid the costs of `maybe_from_u64` when we want to + /// encode a constant that we know at compiler-build time. It constructs an `ImmLogic` from + /// the fields `n`, `r`, `s` and `size`, but in a debug build, checks that `value_to_check` + /// corresponds to those four fields. The intention is that, in a non-debug build, this + /// reduces to something small enough that it will be a candidate for inlining. + pub fn from_n_r_s(value_to_check: u64, n: bool, r: u8, s: u8, size: OperandSize) -> Self { + // Construct it from the components we got given. + let imml = Self { + value: value_to_check, + n, + r, + s, + size, + }; + + // In debug mode, check that `n`/`r`/`s` are correct, given `value` and `size`. + debug_assert!(match ImmLogic::maybe_from_u64( + value_to_check, + if size == OperandSize::Size64 { + I64 + } else { + I32 + } + ) { + None => false, // fail: `value` is unrepresentable + Some(imml_check) => imml_check == imml, + }); + + imml + } } /// An immediate for shift instructions. diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index bfa296dba38e..489e20576e44 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -606,6 +606,68 @@ pub enum Inst { cond: Cond, }, + /// A synthetic insn, which is a load-linked store-conditional loop, that has the overall + /// effect of atomically modifying a memory location in a particular way. Because we have + /// no way to explain to the regalloc about earlyclobber registers, this instruction has + /// completely fixed operand registers, and we rely on the RA's coalescing to remove copies + /// in the surrounding code to the extent it can. The sequence is both preceded and + /// followed by a fence which is at least as comprehensive as that of the `Fence` + /// instruction below. This instruction is sequentially consistent. The operand + /// conventions are: + /// + /// x25 (rd) address + /// x26 (rd) second operand for `op` + /// x27 (wr) old value + /// x24 (wr) scratch reg; value afterwards has no meaning + /// x28 (wr) scratch reg; value afterwards has no meaning + AtomicRMW { + ty: Type, // I8, I16, I32 or I64 + op: AtomicRMWOp, + srcloc: Option, + }, + + /// Similar to AtomicRMW, a compare-and-swap operation implemented using a load-linked + /// store-conditional loop. (Although we could possibly implement it more directly using + /// CAS insns that are available in some revisions of AArch64 above 8.0). The sequence is + /// both preceded and followed by a fence which is at least as comprehensive as that of the + /// `Fence` instruction below. This instruction is sequentially consistent. Note that the + /// operand conventions, although very similar to AtomicRMW, are different: + /// + /// x25 (rd) address + /// x26 (rd) expected value + /// x28 (rd) replacement value + /// x27 (wr) old value + /// x24 (wr) scratch reg; value afterwards has no meaning + AtomicCAS { + ty: Type, // I8, I16, I32 or I64 + srcloc: Option, + }, + + /// Read `ty` bits from address `r_addr`, zero extend the loaded value to 64 bits and put it + /// in `r_data`. The load instruction is preceded by a fence at least as comprehensive as + /// that of the `Fence` instruction below. This instruction is sequentially consistent. + AtomicLoad { + ty: Type, // I8, I16, I32 or I64 + r_data: Writable, + r_addr: Reg, + srcloc: Option, + }, + + /// Write the lowest `ty` bits of `r_data` to address `r_addr`, with a memory fence + /// instruction following the store. The fence is at least as comprehensive as that of the + /// `Fence` instruction below. This instruction is sequentially consistent. + AtomicStore { + ty: Type, // I8, I16, I32 or I64 + r_data: Reg, + r_addr: Reg, + srcloc: Option, + }, + + /// A memory fence. This must provide ordering to ensure that, at a minimum, neither loads + /// nor stores may move forwards or backwards across the fence. Currently emitted as "dmb + /// ish". This instruction is sequentially consistent. + Fence, + /// FPU move. Note that this is distinct from a vector-register /// move; moving just 64 bits seems to be significantly faster. FpuMove64 { @@ -1249,6 +1311,29 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::CCmpImm { rn, .. } => { collector.add_use(rn); } + &Inst::AtomicRMW { .. } => { + collector.add_use(xreg(25)); + collector.add_use(xreg(26)); + collector.add_def(writable_xreg(24)); + collector.add_def(writable_xreg(27)); + collector.add_def(writable_xreg(28)); + } + &Inst::AtomicCAS { .. } => { + collector.add_use(xreg(25)); + collector.add_use(xreg(26)); + collector.add_use(xreg(28)); + collector.add_def(writable_xreg(24)); + collector.add_def(writable_xreg(27)); + } + &Inst::AtomicLoad { r_data, r_addr, .. } => { + collector.add_use(r_addr); + collector.add_def(r_data); + } + &Inst::AtomicStore { r_data, r_addr, .. } => { + collector.add_use(r_addr); + collector.add_use(r_data); + } + &Inst::Fence {} => {} &Inst::FpuMove64 { rd, rn } => { collector.add_def(rd); collector.add_use(rn); @@ -1721,6 +1806,29 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut Inst::CCmpImm { ref mut rn, .. } => { map_use(mapper, rn); } + &mut Inst::AtomicRMW { .. } => { + // There are no vregs to map in this insn. + } + &mut Inst::AtomicCAS { .. } => { + // There are no vregs to map in this insn. + } + &mut Inst::AtomicLoad { + ref mut r_data, + ref mut r_addr, + .. + } => { + map_def(mapper, r_data); + map_use(mapper, r_addr); + } + &mut Inst::AtomicStore { + ref mut r_data, + ref mut r_addr, + .. + } => { + map_use(mapper, r_data); + map_use(mapper, r_addr); + } + &mut Inst::Fence {} => {} &mut Inst::FpuMove64 { ref mut rd, ref mut rn, @@ -2534,6 +2642,28 @@ impl Inst { let cond = cond.show_rru(mb_rru); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } + &Inst::AtomicRMW { ty, op, .. } => { + format!( + "atomically {{ {}_bits_at_[x25]) {:?}= x26 ; x27 = old_value_at_[x25]; x24,x28 = trash }}", + ty.bits(), op) + } + &Inst::AtomicCAS { ty, .. } => { + format!( + "atomically {{ compare-and-swap({}_bits_at_[x25], x26 -> x28), x27 = old_value_at_[x25]; x24 = trash }}", + ty.bits()) + } + &Inst::AtomicLoad { ty, r_data, r_addr, .. } => { + format!( + "atomically {{ {} = zero_extend_{}_bits_at[{}] }}", + r_data.show_rru(mb_rru), ty.bits(), r_addr.show_rru(mb_rru)) + } + &Inst::AtomicStore { ty, r_data, r_addr, .. } => { + format!( + "atomically {{ {}_bits_at[{}] = {} }}", ty.bits(), r_addr.show_rru(mb_rru), r_data.show_rru(mb_rru)) + } + &Inst::Fence {} => { + format!("dmb ish") + } &Inst::FpuMove64 { rd, rn } => { let rd = rd.to_reg().show_rru(mb_rru); let rn = rn.show_rru(mb_rru); diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 321ee77c661b..076145d6d6ec 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -10,7 +10,7 @@ use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::types::*; use crate::ir::Inst as IRInst; -use crate::ir::{InstructionData, Opcode, TrapCode, Type}; +use crate::ir::{AtomicRmwOp, InstructionData, Opcode, TrapCode, Type}; use crate::machinst::lower::*; use crate::machinst::*; use crate::CodegenResult; @@ -1082,6 +1082,13 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option { } } +pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option { + match data { + &InstructionData::AtomicRmw { op, .. } => Some(op), + _ => None, + } +} + /// Checks for an instance of `op` feeding the given input. pub(crate) fn maybe_input_insn>( c: &mut C, diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index c90530c21f2f..aae8b2e60749 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -12,7 +12,7 @@ use crate::CodegenResult; use crate::isa::aarch64::abi::*; use crate::isa::aarch64::inst::*; -use regalloc::RegClass; +use regalloc::{RegClass, Writable}; use alloc::boxed::Box; use alloc::vec::Vec; @@ -21,6 +21,13 @@ use smallvec::SmallVec; use super::lower::*; +fn is_single_word_int_ty(ty: Type) -> bool { + match ty { + I8 | I16 | I32 | I64 => true, + _ => false, + } +} + /// Actually codegen an instruction's results into registers. pub(crate) fn lower_insn_to_regs>( ctx: &mut C, @@ -1108,6 +1115,123 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(inst); } + Opcode::AtomicRmw => { + let r_dst = get_output_reg(ctx, outputs[0]); + let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty_access = ty.unwrap(); + assert!(is_single_word_int_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + // Make sure that both args are in virtual regs, since in effect + // we have to do a parallel copy to get them safely to the AtomicRMW input + // regs, and that's not guaranteed safe if either is in a real reg. + r_addr = ctx.ensure_in_vreg(r_addr, I64); + r_arg2 = ctx.ensure_in_vreg(r_arg2, I64); + // Move the args to the preordained AtomicRMW input regs + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64)); + // Now the AtomicRMW insn itself + let op = AtomicRMWOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap()); + ctx.emit(Inst::AtomicRMW { + ty: ty_access, + op, + srcloc, + }); + // And finally, copy the preordained AtomicRMW output reg to its destination. + ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); + // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. + } + + Opcode::AtomicCas => { + // This is very similar to, but not identical to, the AtomicRmw case. Note + // that the AtomicCAS sequence does its own masking, so we don't need to worry + // about zero-extending narrow (I8/I16/I32) values here. + let r_dst = get_output_reg(ctx, outputs[0]); + let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let ty_access = ty.unwrap(); + assert!(is_single_word_int_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + // Make sure that all three args are in virtual regs. See corresponding comment + // for `Opcode::AtomicRmw` above. + r_addr = ctx.ensure_in_vreg(r_addr, I64); + r_expected = ctx.ensure_in_vreg(r_expected, I64); + r_replacement = ctx.ensure_in_vreg(r_replacement, I64); + // Move the args to the preordained AtomicCAS input regs + ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64)); + ctx.emit(Inst::gen_move( + Writable::from_reg(xreg(26)), + r_expected, + I64, + )); + ctx.emit(Inst::gen_move( + Writable::from_reg(xreg(28)), + r_replacement, + I64, + )); + // Now the AtomicCAS itself, implemented in the normal way, with an LL-SC loop + ctx.emit(Inst::AtomicCAS { + ty: ty_access, + srcloc, + }); + // And finally, copy the preordained AtomicCAS output reg to its destination. + ctx.emit(Inst::gen_move(r_dst, xreg(27), I64)); + // Also, x24 and x28 are trashed. `fn aarch64_get_regs` must mention that. + } + + Opcode::AtomicLoad => { + let r_data = get_output_reg(ctx, outputs[0]); + let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty_access = ty.unwrap(); + assert!(is_single_word_int_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + ctx.emit(Inst::AtomicLoad { + ty: ty_access, + r_data, + r_addr, + srcloc, + }); + } + + Opcode::AtomicStore => { + let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty_access = ctx.input_ty(insn, 0); + assert!(is_single_word_int_ty(ty_access)); + let memflags = ctx.memflags(insn).expect("memory flags"); + let srcloc = if !memflags.notrap() { + Some(ctx.srcloc(insn)) + } else { + None + }; + ctx.emit(Inst::AtomicStore { + ty: ty_access, + r_data, + r_addr, + srcloc, + }); + } + + Opcode::Fence => { + ctx.emit(Inst::Fence {}); + } + Opcode::StackLoad | Opcode::StackStore => { panic!("Direct stack memory access not supported; should not be used by Wasm"); } @@ -1544,11 +1668,10 @@ pub(crate) fn lower_insn_to_regs>( cond }; - ctx.emit(Inst::TrapIf { + ctx.emit_safepoint(Inst::TrapIf { trap_info, kind: CondBrKind::Cond(cond), }); - ctx.emit_safepoint(Inst::Udf { trap_info }) } Opcode::Safepoint => { diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 6f235f0216f5..6aa0f2c98f17 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -161,6 +161,9 @@ pub trait LowerCtx { fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool; /// Retrieve constant data given a handle. fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData; + /// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg + /// if `reg` is a real reg. `ty` describes the type of the value in `reg`. + fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg; } /// A representation of all of the ways in which an instruction input is @@ -905,10 +908,14 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { fn memflags(&self, ir_inst: Inst) -> Option { match &self.f.dfg[ir_inst] { + &InstructionData::AtomicCas { flags, .. } => Some(flags), + &InstructionData::AtomicRmw { flags, .. } => Some(flags), &InstructionData::Load { flags, .. } | &InstructionData::LoadComplex { flags, .. } + | &InstructionData::LoadNoOffset { flags, .. } | &InstructionData::Store { flags, .. } | &InstructionData::StoreComplex { flags, .. } => Some(flags), + &InstructionData::StoreNoOffset { flags, .. } => Some(flags), _ => None, } } @@ -990,6 +997,17 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData { self.f.dfg.constants.get(constant_handle) } + + fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg { + if reg.is_virtual() { + reg + } else { + let rc = reg.get_class(); + let new_reg = self.alloc_tmp(rc, ty); + self.emit(I::gen_move(new_reg, reg, ty)); + new_reg.to_reg() + } + } } /// Visit all successors of a block with a given visitor closure. diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index aff4bcae2669..dae9ff983d23 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -749,7 +749,11 @@ impl<'a> Verifier<'a> { } // Exhaustive list so we can't forget to add new formats - Unary { .. } + AtomicCas { .. } + | AtomicRmw { .. } + | LoadNoOffset { .. } + | StoreNoOffset { .. } + | Unary { .. } | UnaryConst { .. } | UnaryImm { .. } | UnaryIeee32 { .. } diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index ba4543d39fd0..8d73e2d1e469 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -498,6 +498,10 @@ pub fn write_operands( let pool = &dfg.value_lists; use crate::ir::instructions::InstructionData::*; match dfg[inst] { + AtomicRmw { op, args, .. } => write!(w, " {}, {}, {}", op, args[0], args[1]), + AtomicCas { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]), + LoadNoOffset { flags, arg, .. } => write!(w, "{} {}", flags, arg), + StoreNoOffset { flags, args, .. } => write!(w, "{} {}, {}", flags, args[0], args[1]), Unary { arg, .. } => write!(w, " {}", arg), UnaryImm { imm, .. } => write!(w, " {}", imm), UnaryIeee32 { imm, .. } => write!(w, " {}", imm), diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 4d483847fe75..44a2ea30e31e 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -3202,6 +3202,52 @@ impl<'a> Parser<'a> { code, } } + InstructionFormat::AtomicCas => { + let flags = self.optional_memflags(); + let addr = self.match_value("expected SSA value address")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let expected = self.match_value("expected SSA value address")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let replacement = self.match_value("expected SSA value address")?; + InstructionData::AtomicCas { + opcode, + flags, + args: [addr, expected, replacement], + } + } + InstructionFormat::AtomicRmw => { + let flags = self.optional_memflags(); + let op = self.match_enum("expected AtomicRmwOp")?; + let addr = self.match_value("expected SSA value address")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let arg2 = self.match_value("expected SSA value address")?; + InstructionData::AtomicRmw { + opcode, + flags, + op, + args: [addr, arg2], + } + } + InstructionFormat::LoadNoOffset => { + let flags = self.optional_memflags(); + let addr = self.match_value("expected SSA value address")?; + InstructionData::LoadNoOffset { + opcode, + flags, + arg: addr, + } + } + InstructionFormat::StoreNoOffset => { + let flags = self.optional_memflags(); + let arg = self.match_value("expected SSA value operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let addr = self.match_value("expected SSA value address")?; + InstructionData::StoreNoOffset { + opcode, + flags, + args: [arg, addr], + } + } }; Ok(idata) } diff --git a/cranelift/serde/src/serde_clif_json.rs b/cranelift/serde/src/serde_clif_json.rs index 80ee84633a5a..3ec99175536c 100644 --- a/cranelift/serde/src/serde_clif_json.rs +++ b/cranelift/serde/src/serde_clif_json.rs @@ -252,6 +252,27 @@ pub enum SerInstData { cond: String, code: String, }, + AtomicCas { + opcode: String, + args: [String; 3], + flags: String, + }, + AtomicRmw { + opcode: String, + args: [String; 2], + flags: String, + op: String, + }, + LoadNoOffset { + opcode: String, + arg: String, + flags: String, + }, + StoreNoOffset { + opcode: String, + args: [String; 2], + flags: String, + }, } /// Convert Cranelift IR instructions to JSON format. @@ -739,6 +760,53 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData { cond: cond.to_string(), code: code.to_string(), }, + InstructionData::AtomicCas { + opcode, + args, + flags, + } => { + let hold_args = [ + args[0].to_string(), + args[1].to_string(), + args[2].to_string(), + ]; + SerInstData::AtomicCas { + opcode: opcode.to_string(), + args: hold_args, + flags: flags.to_string(), + } + } + InstructionData::AtomicRmw { + opcode, + args, + flags, + op, + } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::AtomicRmw { + opcode: opcode.to_string(), + args: hold_args, + flags: flags.to_string(), + op: op.to_string(), + } + } + InstructionData::LoadNoOffset { opcode, arg, flags } => SerInstData::LoadNoOffset { + opcode: opcode.to_string(), + arg: arg.to_string(), + flags: flags.to_string(), + }, + InstructionData::StoreNoOffset { + opcode, + args, + flags, + } => { + let hold_args = [args[0].to_string(), args[1].to_string()]; + SerInstData::StoreNoOffset { + opcode: opcode.to_string(), + args: hold_args, + flags: flags.to_string(), + } + } } } diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 79eae5c2a6b9..c7a95ccabc7f 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -36,7 +36,7 @@ use cranelift_codegen::ir::condcodes::{FloatCC, IntCC}; use cranelift_codegen::ir::immediates::Offset32; use cranelift_codegen::ir::types::*; use cranelift_codegen::ir::{ - self, ConstantData, InstBuilder, JumpTableData, MemFlags, Value, ValueLabel, + self, AtomicRmwOp, ConstantData, InstBuilder, JumpTableData, MemFlags, Value, ValueLabel, }; use cranelift_codegen::packed_option::ReservedValue; use cranelift_frontend::{FunctionBuilder, Variable}; @@ -1051,74 +1051,285 @@ pub fn translate_operator( let index = FuncIndex::from_u32(*function_index); state.push1(environ.translate_ref_func(builder.cursor(), index)?); } - Operator::AtomicNotify { .. } - | Operator::I32AtomicWait { .. } - | Operator::I64AtomicWait { .. } - | Operator::I32AtomicLoad { .. } - | Operator::I64AtomicLoad { .. } - | Operator::I32AtomicLoad8U { .. } - | Operator::I32AtomicLoad16U { .. } - | Operator::I64AtomicLoad8U { .. } - | Operator::I64AtomicLoad16U { .. } - | Operator::I64AtomicLoad32U { .. } - | Operator::I32AtomicStore { .. } - | Operator::I64AtomicStore { .. } - | Operator::I32AtomicStore8 { .. } - | Operator::I32AtomicStore16 { .. } - | Operator::I64AtomicStore8 { .. } - | Operator::I64AtomicStore16 { .. } - | Operator::I64AtomicStore32 { .. } - | Operator::I32AtomicRmwAdd { .. } - | Operator::I64AtomicRmwAdd { .. } - | Operator::I32AtomicRmw8AddU { .. } - | Operator::I32AtomicRmw16AddU { .. } - | Operator::I64AtomicRmw8AddU { .. } - | Operator::I64AtomicRmw16AddU { .. } - | Operator::I64AtomicRmw32AddU { .. } - | Operator::I32AtomicRmwSub { .. } - | Operator::I64AtomicRmwSub { .. } - | Operator::I32AtomicRmw8SubU { .. } - | Operator::I32AtomicRmw16SubU { .. } - | Operator::I64AtomicRmw8SubU { .. } - | Operator::I64AtomicRmw16SubU { .. } - | Operator::I64AtomicRmw32SubU { .. } - | Operator::I32AtomicRmwAnd { .. } - | Operator::I64AtomicRmwAnd { .. } - | Operator::I32AtomicRmw8AndU { .. } - | Operator::I32AtomicRmw16AndU { .. } - | Operator::I64AtomicRmw8AndU { .. } - | Operator::I64AtomicRmw16AndU { .. } - | Operator::I64AtomicRmw32AndU { .. } - | Operator::I32AtomicRmwOr { .. } - | Operator::I64AtomicRmwOr { .. } - | Operator::I32AtomicRmw8OrU { .. } - | Operator::I32AtomicRmw16OrU { .. } - | Operator::I64AtomicRmw8OrU { .. } - | Operator::I64AtomicRmw16OrU { .. } - | Operator::I64AtomicRmw32OrU { .. } - | Operator::I32AtomicRmwXor { .. } - | Operator::I64AtomicRmwXor { .. } - | Operator::I32AtomicRmw8XorU { .. } - | Operator::I32AtomicRmw16XorU { .. } - | Operator::I64AtomicRmw8XorU { .. } - | Operator::I64AtomicRmw16XorU { .. } - | Operator::I64AtomicRmw32XorU { .. } - | Operator::I32AtomicRmwXchg { .. } - | Operator::I64AtomicRmwXchg { .. } - | Operator::I32AtomicRmw8XchgU { .. } - | Operator::I32AtomicRmw16XchgU { .. } - | Operator::I64AtomicRmw8XchgU { .. } - | Operator::I64AtomicRmw16XchgU { .. } - | Operator::I64AtomicRmw32XchgU { .. } - | Operator::I32AtomicRmwCmpxchg { .. } - | Operator::I64AtomicRmwCmpxchg { .. } - | Operator::I32AtomicRmw8CmpxchgU { .. } - | Operator::I32AtomicRmw16CmpxchgU { .. } - | Operator::I64AtomicRmw8CmpxchgU { .. } - | Operator::I64AtomicRmw16CmpxchgU { .. } - | Operator::I64AtomicRmw32CmpxchgU { .. } - | Operator::AtomicFence { .. } => { - return Err(wasm_unsupported!("proposed thread operator {:?}", op)); + Operator::I32AtomicWait { .. } | Operator::I64AtomicWait { .. } => { + // The WebAssembly MVP only supports one linear memory and + // wasmparser will ensure that the memory indices specified are + // zero. + let implied_ty = match op { + Operator::I64AtomicWait { .. } => I64, + Operator::I32AtomicWait { .. } => I32, + _ => unreachable!(), + }; + let heap_index = MemoryIndex::from_u32(0); + let heap = state.get_heap(builder.func, 0, environ)?; + let timeout = state.pop1(); // 64 (fixed) + let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`) + let addr = state.pop1(); // 32 (fixed) + assert!(builder.func.dfg.value_type(expected) == implied_ty); + // `fn translate_atomic_wait` can inspect the type of `expected` to figure out what + // code it needs to generate, if it wants. + let res = environ.translate_atomic_wait( + builder.cursor(), + heap_index, + heap, + addr, + expected, + timeout, + )?; + state.push1(res); + } + Operator::AtomicNotify { .. } => { + // The WebAssembly MVP only supports one linear memory and + // wasmparser will ensure that the memory indices specified are + // zero. + let heap_index = MemoryIndex::from_u32(0); + let heap = state.get_heap(builder.func, 0, environ)?; + let count = state.pop1(); // 32 (fixed) + let addr = state.pop1(); // 32 (fixed) + let res = + environ.translate_atomic_notify(builder.cursor(), heap_index, heap, addr, count)?; + state.push1(res); + } + Operator::I32AtomicLoad { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I32, I32, *offset, builder, state, environ)?, + Operator::I64AtomicLoad { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I64, I64, *offset, builder, state, environ)?, + Operator::I32AtomicLoad8U { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I32, I8, *offset, builder, state, environ)?, + Operator::I32AtomicLoad16U { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I32, I16, *offset, builder, state, environ)?, + Operator::I64AtomicLoad8U { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I64, I8, *offset, builder, state, environ)?, + Operator::I64AtomicLoad16U { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I64, I16, *offset, builder, state, environ)?, + Operator::I64AtomicLoad32U { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_load(I64, I32, *offset, builder, state, environ)?, + + Operator::I32AtomicStore { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I32, *offset, builder, state, environ)?, + Operator::I64AtomicStore { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I64, *offset, builder, state, environ)?, + Operator::I32AtomicStore8 { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I8, *offset, builder, state, environ)?, + Operator::I32AtomicStore16 { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I16, *offset, builder, state, environ)?, + Operator::I64AtomicStore8 { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I8, *offset, builder, state, environ)?, + Operator::I64AtomicStore16 { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I16, *offset, builder, state, environ)?, + Operator::I64AtomicStore32 { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_store(I32, *offset, builder, state, environ)?, + + Operator::I32AtomicRmwAdd { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I32, AtomicRmwOp::Add, *offset, builder, state, environ)?, + Operator::I64AtomicRmwAdd { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I64, AtomicRmwOp::Add, *offset, builder, state, environ)?, + Operator::I32AtomicRmw8AddU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I8, AtomicRmwOp::Add, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16AddU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I16, AtomicRmwOp::Add, *offset, builder, state, environ)?, + Operator::I64AtomicRmw8AddU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I8, AtomicRmwOp::Add, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16AddU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I16, AtomicRmwOp::Add, *offset, builder, state, environ)?, + Operator::I64AtomicRmw32AddU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I32, AtomicRmwOp::Add, *offset, builder, state, environ)?, + + Operator::I32AtomicRmwSub { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I32, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + Operator::I64AtomicRmwSub { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I64, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + Operator::I32AtomicRmw8SubU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I8, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16SubU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I16, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + Operator::I64AtomicRmw8SubU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I8, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16SubU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I16, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + Operator::I64AtomicRmw32SubU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I32, AtomicRmwOp::Sub, *offset, builder, state, environ)?, + + Operator::I32AtomicRmwAnd { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I32, AtomicRmwOp::And, *offset, builder, state, environ)?, + Operator::I64AtomicRmwAnd { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I64, AtomicRmwOp::And, *offset, builder, state, environ)?, + Operator::I32AtomicRmw8AndU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I8, AtomicRmwOp::And, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16AndU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I16, AtomicRmwOp::And, *offset, builder, state, environ)?, + Operator::I64AtomicRmw8AndU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I8, AtomicRmwOp::And, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16AndU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I16, AtomicRmwOp::And, *offset, builder, state, environ)?, + Operator::I64AtomicRmw32AndU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I32, AtomicRmwOp::And, *offset, builder, state, environ)?, + + Operator::I32AtomicRmwOr { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I32, AtomicRmwOp::Or, *offset, builder, state, environ)?, + Operator::I64AtomicRmwOr { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I64, AtomicRmwOp::Or, *offset, builder, state, environ)?, + Operator::I32AtomicRmw8OrU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I8, AtomicRmwOp::Or, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16OrU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I16, AtomicRmwOp::Or, *offset, builder, state, environ)?, + Operator::I64AtomicRmw8OrU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I8, AtomicRmwOp::Or, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16OrU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I16, AtomicRmwOp::Or, *offset, builder, state, environ)?, + Operator::I64AtomicRmw32OrU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I32, AtomicRmwOp::Or, *offset, builder, state, environ)?, + + Operator::I32AtomicRmwXor { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I32, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + Operator::I64AtomicRmwXor { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I64, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + Operator::I32AtomicRmw8XorU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I8, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16XorU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I16, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + Operator::I64AtomicRmw8XorU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I8, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16XorU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I16, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + Operator::I64AtomicRmw32XorU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I32, AtomicRmwOp::Xor, *offset, builder, state, environ)?, + + Operator::I32AtomicRmwXchg { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw( + I32, + I32, + AtomicRmwOp::Xchg, + *offset, + builder, + state, + environ, + )?, + Operator::I64AtomicRmwXchg { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw( + I64, + I64, + AtomicRmwOp::Xchg, + *offset, + builder, + state, + environ, + )?, + Operator::I32AtomicRmw8XchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I32, I8, AtomicRmwOp::Xchg, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16XchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw( + I32, + I16, + AtomicRmwOp::Xchg, + *offset, + builder, + state, + environ, + )?, + Operator::I64AtomicRmw8XchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw(I64, I8, AtomicRmwOp::Xchg, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16XchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw( + I64, + I16, + AtomicRmwOp::Xchg, + *offset, + builder, + state, + environ, + )?, + Operator::I64AtomicRmw32XchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_rmw( + I64, + I32, + AtomicRmwOp::Xchg, + *offset, + builder, + state, + environ, + )?, + + Operator::I32AtomicRmwCmpxchg { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I32, I32, *offset, builder, state, environ)?, + Operator::I64AtomicRmwCmpxchg { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I64, I64, *offset, builder, state, environ)?, + Operator::I32AtomicRmw8CmpxchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I32, I8, *offset, builder, state, environ)?, + Operator::I32AtomicRmw16CmpxchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I32, I16, *offset, builder, state, environ)?, + Operator::I64AtomicRmw8CmpxchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I64, I8, *offset, builder, state, environ)?, + Operator::I64AtomicRmw16CmpxchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I64, I16, *offset, builder, state, environ)?, + Operator::I64AtomicRmw32CmpxchgU { + memarg: MemoryImmediate { flags: _, offset }, + } => translate_atomic_cas(I64, I32, *offset, builder, state, environ)?, + + Operator::AtomicFence { .. } => { + builder.ins().fence(); } Operator::MemoryCopy => { // The WebAssembly MVP only supports one linear memory and @@ -1906,7 +2117,7 @@ fn translate_store( environ.pointer_type(), builder, ); - // See the comments in `translate_load` about the flags. + // See the comments in `prepare_load` about the flags. let flags = MemFlags::new(); builder .ins() @@ -1930,6 +2141,233 @@ fn translate_icmp(cc: IntCC, builder: &mut FunctionBuilder, state: &mut FuncTran state.push1(builder.ins().bint(I32, val)); } +// For an atomic memory operation, emit an alignment check for the linear memory address, +// and then compute the final effective address. +fn finalise_atomic_mem_addr( + linear_mem_addr: Value, + offset: u32, + access_ty: Type, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult { + // Check the alignment of `linear_mem_addr`. + let access_ty_bytes = access_ty.bytes(); + let final_lma = builder.ins().iadd_imm(linear_mem_addr, i64::from(offset)); + if access_ty_bytes != 1 { + assert!(access_ty_bytes == 2 || access_ty_bytes == 4 || access_ty_bytes == 8); + let final_lma_misalignment = builder + .ins() + .band_imm(final_lma, i64::from(access_ty_bytes - 1)); + let f = builder + .ins() + .ifcmp_imm(final_lma_misalignment, i64::from(0)); + builder + .ins() + .trapif(IntCC::NotEqual, f, ir::TrapCode::HeapMisaligned); + } + + // Compute the final effective address. Note, we don't yet support multiple linear memories. + let heap = state.get_heap(builder.func, 0, environ)?; + let (base, offset) = get_heap_addr( + heap, + final_lma, + /*offset=*/ 0, + access_ty.bytes(), + environ.pointer_type(), + builder, + ); + + let final_effective_address = builder.ins().iadd_imm(base, i64::from(offset)); + Ok(final_effective_address) +} + +fn translate_atomic_rmw( + widened_ty: Type, + access_ty: Type, + op: AtomicRmwOp, + offset: u32, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + let (linear_mem_addr, mut arg2) = state.pop2(); + let arg2_ty = builder.func.dfg.value_type(arg2); + + // The operation is performed at type `access_ty`, and the old value is zero-extended + // to type `widened_ty`. + match access_ty { + I8 | I16 | I32 | I64 => {} + _ => { + return Err(wasm_unsupported!( + "atomic_rmw: unsupported access type {:?}", + access_ty + )) + } + }; + let w_ty_ok = match widened_ty { + I32 | I64 => true, + _ => false, + }; + assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes()); + + assert!(arg2_ty.bytes() >= access_ty.bytes()); + if arg2_ty.bytes() > access_ty.bytes() { + arg2 = builder.ins().ireduce(access_ty, arg2); + } + + let final_effective_address = + finalise_atomic_mem_addr(linear_mem_addr, offset, access_ty, builder, state, environ)?; + + // See the comments in `prepare_load` about the flags. + let flags = MemFlags::new(); + let mut res = builder + .ins() + .atomic_rmw(access_ty, flags, op, final_effective_address, arg2); + if access_ty != widened_ty { + res = builder.ins().uextend(widened_ty, res); + } + state.push1(res); + Ok(()) +} + +fn translate_atomic_cas( + widened_ty: Type, + access_ty: Type, + offset: u32, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + let (linear_mem_addr, mut expected, mut replacement) = state.pop3(); + let expected_ty = builder.func.dfg.value_type(expected); + let replacement_ty = builder.func.dfg.value_type(replacement); + + // The compare-and-swap is performed at type `access_ty`, and the old value is zero-extended + // to type `widened_ty`. + match access_ty { + I8 | I16 | I32 | I64 => {} + _ => { + return Err(wasm_unsupported!( + "atomic_cas: unsupported access type {:?}", + access_ty + )) + } + }; + let w_ty_ok = match widened_ty { + I32 | I64 => true, + _ => false, + }; + assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes()); + + assert!(expected_ty.bytes() >= access_ty.bytes()); + if expected_ty.bytes() > access_ty.bytes() { + expected = builder.ins().ireduce(access_ty, expected); + } + assert!(replacement_ty.bytes() >= access_ty.bytes()); + if replacement_ty.bytes() > access_ty.bytes() { + replacement = builder.ins().ireduce(access_ty, replacement); + } + + let final_effective_address = + finalise_atomic_mem_addr(linear_mem_addr, offset, access_ty, builder, state, environ)?; + + // See the comments in `prepare_load` about the flags. + let flags = MemFlags::new(); + let mut res = builder + .ins() + .atomic_cas(flags, final_effective_address, expected, replacement); + if access_ty != widened_ty { + res = builder.ins().uextend(widened_ty, res); + } + state.push1(res); + Ok(()) +} + +fn translate_atomic_load( + widened_ty: Type, + access_ty: Type, + offset: u32, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + let linear_mem_addr = state.pop1(); + + // The load is performed at type `access_ty`, and the loaded value is zero extended + // to `widened_ty`. + match access_ty { + I8 | I16 | I32 | I64 => {} + _ => { + return Err(wasm_unsupported!( + "atomic_load: unsupported access type {:?}", + access_ty + )) + } + }; + let w_ty_ok = match widened_ty { + I32 | I64 => true, + _ => false, + }; + assert!(w_ty_ok && widened_ty.bytes() >= access_ty.bytes()); + + let final_effective_address = + finalise_atomic_mem_addr(linear_mem_addr, offset, access_ty, builder, state, environ)?; + + // See the comments in `prepare_load` about the flags. + let flags = MemFlags::new(); + let mut res = builder + .ins() + .atomic_load(access_ty, flags, final_effective_address); + if access_ty != widened_ty { + res = builder.ins().uextend(widened_ty, res); + } + state.push1(res); + Ok(()) +} + +fn translate_atomic_store( + access_ty: Type, + offset: u32, + builder: &mut FunctionBuilder, + state: &mut FuncTranslationState, + environ: &mut FE, +) -> WasmResult<()> { + let (linear_mem_addr, mut data) = state.pop2(); + let data_ty = builder.func.dfg.value_type(data); + + // The operation is performed at type `access_ty`, and the data to be stored may first + // need to be narrowed accordingly. + match access_ty { + I8 | I16 | I32 | I64 => {} + _ => { + return Err(wasm_unsupported!( + "atomic_store: unsupported access type {:?}", + access_ty + )) + } + }; + let d_ty_ok = match data_ty { + I32 | I64 => true, + _ => false, + }; + assert!(d_ty_ok && data_ty.bytes() >= access_ty.bytes()); + + if data_ty.bytes() > access_ty.bytes() { + data = builder.ins().ireduce(access_ty, data); + } + + let final_effective_address = + finalise_atomic_mem_addr(linear_mem_addr, offset, access_ty, builder, state, environ)?; + + // See the comments in `prepare_load` about the flags. + let flags = MemFlags::new(); + builder + .ins() + .atomic_store(flags, data, final_effective_address); + Ok(()) +} + fn translate_vector_icmp( cc: IntCC, needed_type: Type, diff --git a/cranelift/wasm/src/environ/dummy.rs b/cranelift/wasm/src/environ/dummy.rs index 9047df05d27e..9b629f83292d 100644 --- a/cranelift/wasm/src/environ/dummy.rs +++ b/cranelift/wasm/src/environ/dummy.rs @@ -538,6 +538,29 @@ impl<'dummy_environment> FuncEnvironment for DummyFuncEnvironment<'dummy_environ ) -> WasmResult<()> { Ok(()) } + + fn translate_atomic_wait( + &mut self, + mut pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _addr: ir::Value, + _expected: ir::Value, + _timeout: ir::Value, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, -1)) + } + + fn translate_atomic_notify( + &mut self, + mut pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _addr: ir::Value, + _count: ir::Value, + ) -> WasmResult { + Ok(pos.ins().iconst(I32, 0)) + } } impl TargetEnvironment for DummyEnvironment { diff --git a/cranelift/wasm/src/environ/spec.rs b/cranelift/wasm/src/environ/spec.rs index 9a3498b05eab..cf02a829b80e 100644 --- a/cranelift/wasm/src/environ/spec.rs +++ b/cranelift/wasm/src/environ/spec.rs @@ -546,6 +546,38 @@ pub trait FuncEnvironment: TargetEnvironment { val: ir::Value, ) -> WasmResult<()>; + /// Translate an `i32.atomic.wait` or `i64.atomic.wait` WebAssembly instruction. + /// The `index` provided identifies the linear memory containing the value + /// to wait on, and `heap` is the heap reference returned by `make_heap` + /// for the same index. Whether the waited-on value is 32- or 64-bit can be + /// determined by examining the type of `expected`, which must be only I32 or I64. + /// + /// Returns an i32, which is negative if the helper call failed. + fn translate_atomic_wait( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + addr: ir::Value, + expected: ir::Value, + timeout: ir::Value, + ) -> WasmResult; + + /// Translate an `atomic.notify` WebAssembly instruction. + /// The `index` provided identifies the linear memory containing the value + /// to wait on, and `heap` is the heap reference returned by `make_heap` + /// for the same index. + /// + /// Returns an i64, which is negative if the helper call failed. + fn translate_atomic_notify( + &mut self, + pos: FuncCursor, + index: MemoryIndex, + heap: ir::Heap, + addr: ir::Value, + count: ir::Value, + ) -> WasmResult; + /// Emit code at the beginning of every wasm loop. /// /// This can be used to insert explicit interrupt or safepoint checking at diff --git a/crates/environ/src/func_environ.rs b/crates/environ/src/func_environ.rs index 2def7447dabe..e2a4be7530d5 100644 --- a/crates/environ/src/func_environ.rs +++ b/crates/environ/src/func_environ.rs @@ -1612,6 +1612,33 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m Ok(()) } + fn translate_atomic_wait( + &mut self, + _pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _addr: ir::Value, + _expected: ir::Value, + _timeout: ir::Value, + ) -> WasmResult { + Err(WasmError::Unsupported( + "wasm atomics (fn translate_atomic_wait)".to_string(), + )) + } + + fn translate_atomic_notify( + &mut self, + _pos: FuncCursor, + _index: MemoryIndex, + _heap: ir::Heap, + _addr: ir::Value, + _count: ir::Value, + ) -> WasmResult { + Err(WasmError::Unsupported( + "wasm atomics (fn translate_atomic_notify)".to_string(), + )) + } + fn translate_loop_header(&mut self, mut pos: FuncCursor) -> WasmResult<()> { if !self.tunables.interruptable { return Ok(()); diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs index f8ec2acfae01..02f65774632a 100644 --- a/crates/wasmtime/src/trap.rs +++ b/crates/wasmtime/src/trap.rs @@ -109,6 +109,7 @@ impl Trap { let desc = match code { StackOverflow => "call stack exhausted", HeapOutOfBounds => "out of bounds memory access", + HeapMisaligned => "misaligned memory access", TableOutOfBounds => "undefined element: out of bounds table access", IndirectCallToNull => "uninitialized element", BadSignature => "indirect call type mismatch",