From 6a5bddf02d9f136bb0f998d26a91f97dce9860bc Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 31 Aug 2023 18:13:23 +0100 Subject: [PATCH] cranelift-interpreter: Fix SIMD shifts and rotates (#6939) * cranelift-interpreter: Fix SIMD `ishl`/`{s,u}`shr * fuzzgen: Enable a few more ops * cranelift: Fix tests for {u,s}shr * fuzzgen: Change pattern matching arms for shifts Co-Authored-By: Jamey Sharp --------- Co-authored-by: Jamey Sharp --- .../filetests/runtests/simd-ishl.clif | 1 + .../filetests/runtests/simd-sshr.clif | 3 +- .../filetests/runtests/simd-ushr.clif | 3 +- cranelift/fuzzgen/src/function_generator.rs | 137 ++++-------------- cranelift/interpreter/src/step.rs | 42 +++--- 5 files changed, 57 insertions(+), 129 deletions(-) diff --git a/cranelift/filetests/filetests/runtests/simd-ishl.clif b/cranelift/filetests/filetests/runtests/simd-ishl.clif index 524bc2745ca5..648d8f9fe9b0 100644 --- a/cranelift/filetests/filetests/runtests/simd-ishl.clif +++ b/cranelift/filetests/filetests/runtests/simd-ishl.clif @@ -1,3 +1,4 @@ +test interpret test run target aarch64 target s390x diff --git a/cranelift/filetests/filetests/runtests/simd-sshr.clif b/cranelift/filetests/filetests/runtests/simd-sshr.clif index a5fc4782176e..2df402270917 100644 --- a/cranelift/filetests/filetests/runtests/simd-sshr.clif +++ b/cranelift/filetests/filetests/runtests/simd-sshr.clif @@ -1,3 +1,4 @@ +test interpret test run target aarch64 target s390x @@ -70,7 +71,7 @@ block0(v0: i8x16): v2 = sshr v0, v1 return v2 } -; run: %i8x16_shl_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0xe0 0 0 0 0 0 0 0 0] +; run: %i8x16_sshr_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0xe0 0 0 0 0 0 0 0 0] function %i16x8_sshr_const(i16x8) -> i16x8 { block0(v0: i16x8): diff --git a/cranelift/filetests/filetests/runtests/simd-ushr.clif b/cranelift/filetests/filetests/runtests/simd-ushr.clif index 5b188523e417..5d8cf88bef2f 100644 --- a/cranelift/filetests/filetests/runtests/simd-ushr.clif +++ b/cranelift/filetests/filetests/runtests/simd-ushr.clif @@ -1,3 +1,4 @@ +test interpret test run target aarch64 target s390x @@ -54,7 +55,7 @@ block0(v0: i8x16): v2 = ushr v0, v1 return v2 } -; run: %i8x16_shl_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0x20 0 0 0 0 0 0 0 0] +; run: %i8x16_ushr_const([0x01 0x02 0x04 0x08 0x10 0x20 0x40 0x80 0 0 0 0 0 0 0 0]) == [0 0 0x01 0x02 0x04 0x08 0x10 0x20 0 0 0 0 0 0 0 0] function %i16x8_ushr_const(i16x8) -> i16x8 { block0(v0: i16x8): diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index 15e78443317e..216adce67662 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -525,6 +525,8 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - (Opcode::Cls, &[I32], &[I32]), (Opcode::Cls, &[I64], &[I64]), (Opcode::Cls, &[I128], &[I128]), + // TODO + (Opcode::Bitselect, &[_, _, _], &[F32 | F64]), // https://github.com/bytecodealliance/wasmtime/issues/4897 // https://github.com/bytecodealliance/wasmtime/issues/4899 ( @@ -589,6 +591,15 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - &[], &[I8X16 | I16X8 | I32X4 | I64X2 | F32X4 | F64X2] ), + // TODO + ( + Opcode::Sshr | Opcode::Ushr | Opcode::Ishl, + &[I8X16 | I16X8 | I32X4 | I64X2, I128] + ), + ( + Opcode::Rotr | Opcode::Rotl, + &[I8X16 | I16X8 | I32X4 | I64X2, _] + ), ) } @@ -647,6 +658,17 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - // https://github.com/bytecodealliance/wasmtime/issues/6104 (Opcode::Bitcast, &[I128], &[_]), (Opcode::Bitcast, &[_], &[I128]), + // TODO + ( + Opcode::Sshr | Opcode::Ushr | Opcode::Ishl, + &[I8X16 | I16X8 | I32X4 | I64X2, I128] + ), + ( + Opcode::Rotr | Opcode::Rotl, + &[I8X16 | I16X8 | I32X4 | I64X2, _] + ), + // TODO + (Opcode::Bitselect, &[_, _, _], &[F32 | F64]), ) } @@ -689,6 +711,8 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - // https://github.com/bytecodealliance/wasmtime/issues/6104 (Opcode::Bitcast, &[I128], &[_]), (Opcode::Bitcast, &[_], &[I128]), + // TODO + (Opcode::Bitselect, &[_, _, _], &[F32 | F64]), ) } @@ -737,8 +761,9 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - (Opcode::Bitcast, &[I128], &[_]), (Opcode::Bitcast, &[_], &[I128]), // TODO - (Opcode::SelectSpectreGuard, &[_, _, _], &[F32]), - (Opcode::SelectSpectreGuard, &[_, _, _], &[F64]), + (Opcode::SelectSpectreGuard, &[_, _, _], &[F32 | F64]), + // TODO + (Opcode::Bitselect, &[_, _, _], &[F32 | F64]), ) } @@ -896,10 +921,6 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::ScalarToVector), (Opcode::X86Pmaddubsw), (Opcode::X86Cvtt2dq), - (Opcode::Bitselect, &[F32, F32, F32], &[F32]), - (Opcode::Bitselect, &[F64, F64, F64], &[F64]), - (Opcode::Bitselect, &[F32X4, F32X4, F32X4], &[F32X4]), - (Opcode::Bitselect, &[F64X2, F64X2, F64X2], &[F64X2]), (Opcode::VanyTrue, &[F32X4], &[I8]), (Opcode::VanyTrue, &[F64X2], &[I8]), (Opcode::VhighBits, &[F32X4], &[I8]), @@ -952,10 +973,6 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::VhighBits, &[I64X2], &[I64X2]), (Opcode::VhighBits, &[F32X4], &[I64X2]), (Opcode::VhighBits, &[F64X2], &[I64X2]), - (Opcode::Ineg, &[I8X16], &[I8X16]), - (Opcode::Ineg, &[I16X8], &[I16X8]), - (Opcode::Ineg, &[I32X4], &[I32X4]), - (Opcode::Ineg, &[I64X2], &[I64X2]), (Opcode::Umulhi, &[I128, I128], &[I128]), (Opcode::Smulhi, &[I128, I128], &[I128]), // https://github.com/bytecodealliance/wasmtime/issues/6073 @@ -966,106 +983,6 @@ static OPCODE_SIGNATURES: Lazy> = Lazy::new(|| { (Opcode::Isplit, &[I64], &[I32, I32]), (Opcode::Isplit, &[I32], &[I16, I16]), (Opcode::Isplit, &[I16], &[I8, I8]), - (Opcode::Rotl, &[I8X16, I8], &[I8X16]), - (Opcode::Rotl, &[I8X16, I16], &[I8X16]), - (Opcode::Rotl, &[I8X16, I32], &[I8X16]), - (Opcode::Rotl, &[I8X16, I64], &[I8X16]), - (Opcode::Rotl, &[I8X16, I128], &[I8X16]), - (Opcode::Rotl, &[I16X8, I8], &[I16X8]), - (Opcode::Rotl, &[I16X8, I16], &[I16X8]), - (Opcode::Rotl, &[I16X8, I32], &[I16X8]), - (Opcode::Rotl, &[I16X8, I64], &[I16X8]), - (Opcode::Rotl, &[I16X8, I128], &[I16X8]), - (Opcode::Rotl, &[I32X4, I8], &[I32X4]), - (Opcode::Rotl, &[I32X4, I16], &[I32X4]), - (Opcode::Rotl, &[I32X4, I32], &[I32X4]), - (Opcode::Rotl, &[I32X4, I64], &[I32X4]), - (Opcode::Rotl, &[I32X4, I128], &[I32X4]), - (Opcode::Rotl, &[I64X2, I8], &[I64X2]), - (Opcode::Rotl, &[I64X2, I16], &[I64X2]), - (Opcode::Rotl, &[I64X2, I32], &[I64X2]), - (Opcode::Rotl, &[I64X2, I64], &[I64X2]), - (Opcode::Rotl, &[I64X2, I128], &[I64X2]), - (Opcode::Rotr, &[I8X16, I8], &[I8X16]), - (Opcode::Rotr, &[I8X16, I16], &[I8X16]), - (Opcode::Rotr, &[I8X16, I32], &[I8X16]), - (Opcode::Rotr, &[I8X16, I64], &[I8X16]), - (Opcode::Rotr, &[I8X16, I128], &[I8X16]), - (Opcode::Rotr, &[I16X8, I8], &[I16X8]), - (Opcode::Rotr, &[I16X8, I16], &[I16X8]), - (Opcode::Rotr, &[I16X8, I32], &[I16X8]), - (Opcode::Rotr, &[I16X8, I64], &[I16X8]), - (Opcode::Rotr, &[I16X8, I128], &[I16X8]), - (Opcode::Rotr, &[I32X4, I8], &[I32X4]), - (Opcode::Rotr, &[I32X4, I16], &[I32X4]), - (Opcode::Rotr, &[I32X4, I32], &[I32X4]), - (Opcode::Rotr, &[I32X4, I64], &[I32X4]), - (Opcode::Rotr, &[I32X4, I128], &[I32X4]), - (Opcode::Rotr, &[I64X2, I8], &[I64X2]), - (Opcode::Rotr, &[I64X2, I16], &[I64X2]), - (Opcode::Rotr, &[I64X2, I32], &[I64X2]), - (Opcode::Rotr, &[I64X2, I64], &[I64X2]), - (Opcode::Rotr, &[I64X2, I128], &[I64X2]), - (Opcode::Ishl, &[I8X16, I8], &[I8X16]), - (Opcode::Ishl, &[I8X16, I16], &[I8X16]), - (Opcode::Ishl, &[I8X16, I32], &[I8X16]), - (Opcode::Ishl, &[I8X16, I64], &[I8X16]), - (Opcode::Ishl, &[I8X16, I128], &[I8X16]), - (Opcode::Ishl, &[I16X8, I8], &[I16X8]), - (Opcode::Ishl, &[I16X8, I16], &[I16X8]), - (Opcode::Ishl, &[I16X8, I32], &[I16X8]), - (Opcode::Ishl, &[I16X8, I64], &[I16X8]), - (Opcode::Ishl, &[I16X8, I128], &[I16X8]), - (Opcode::Ishl, &[I32X4, I8], &[I32X4]), - (Opcode::Ishl, &[I32X4, I16], &[I32X4]), - (Opcode::Ishl, &[I32X4, I32], &[I32X4]), - (Opcode::Ishl, &[I32X4, I64], &[I32X4]), - (Opcode::Ishl, &[I32X4, I128], &[I32X4]), - (Opcode::Ishl, &[I64X2, I8], &[I64X2]), - (Opcode::Ishl, &[I64X2, I16], &[I64X2]), - (Opcode::Ishl, &[I64X2, I32], &[I64X2]), - (Opcode::Ishl, &[I64X2, I64], &[I64X2]), - (Opcode::Ishl, &[I64X2, I128], &[I64X2]), - (Opcode::Ushr, &[I8X16, I8], &[I8X16]), - (Opcode::Ushr, &[I8X16, I16], &[I8X16]), - (Opcode::Ushr, &[I8X16, I32], &[I8X16]), - (Opcode::Ushr, &[I8X16, I64], &[I8X16]), - (Opcode::Ushr, &[I8X16, I128], &[I8X16]), - (Opcode::Ushr, &[I16X8, I8], &[I16X8]), - (Opcode::Ushr, &[I16X8, I16], &[I16X8]), - (Opcode::Ushr, &[I16X8, I32], &[I16X8]), - (Opcode::Ushr, &[I16X8, I64], &[I16X8]), - (Opcode::Ushr, &[I16X8, I128], &[I16X8]), - (Opcode::Ushr, &[I32X4, I8], &[I32X4]), - (Opcode::Ushr, &[I32X4, I16], &[I32X4]), - (Opcode::Ushr, &[I32X4, I32], &[I32X4]), - (Opcode::Ushr, &[I32X4, I64], &[I32X4]), - (Opcode::Ushr, &[I32X4, I128], &[I32X4]), - (Opcode::Ushr, &[I64X2, I8], &[I64X2]), - (Opcode::Ushr, &[I64X2, I16], &[I64X2]), - (Opcode::Ushr, &[I64X2, I32], &[I64X2]), - (Opcode::Ushr, &[I64X2, I64], &[I64X2]), - (Opcode::Ushr, &[I64X2, I128], &[I64X2]), - (Opcode::Sshr, &[I8X16, I8], &[I8X16]), - (Opcode::Sshr, &[I8X16, I16], &[I8X16]), - (Opcode::Sshr, &[I8X16, I32], &[I8X16]), - (Opcode::Sshr, &[I8X16, I64], &[I8X16]), - (Opcode::Sshr, &[I8X16, I128], &[I8X16]), - (Opcode::Sshr, &[I16X8, I8], &[I16X8]), - (Opcode::Sshr, &[I16X8, I16], &[I16X8]), - (Opcode::Sshr, &[I16X8, I32], &[I16X8]), - (Opcode::Sshr, &[I16X8, I64], &[I16X8]), - (Opcode::Sshr, &[I16X8, I128], &[I16X8]), - (Opcode::Sshr, &[I32X4, I8], &[I32X4]), - (Opcode::Sshr, &[I32X4, I16], &[I32X4]), - (Opcode::Sshr, &[I32X4, I32], &[I32X4]), - (Opcode::Sshr, &[I32X4, I64], &[I32X4]), - (Opcode::Sshr, &[I32X4, I128], &[I32X4]), - (Opcode::Sshr, &[I64X2, I8], &[I64X2]), - (Opcode::Sshr, &[I64X2, I16], &[I64X2]), - (Opcode::Sshr, &[I64X2, I32], &[I64X2]), - (Opcode::Sshr, &[I64X2, I64], &[I64X2]), - (Opcode::Sshr, &[I64X2, I128], &[I64X2]), (Opcode::Fmin, &[F32X4, F32X4], &[F32X4]), (Opcode::Fmin, &[F64X2, F64X2], &[F64X2]), (Opcode::Fmax, &[F32X4, F32X4], &[F32X4]), diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index f13d2c918400..a8ae06f906ea 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -795,16 +795,16 @@ where Opcode::BandImm => binary(DataValueExt::and, arg(0), imm_as_ctrl_ty()?)?, Opcode::BorImm => binary(DataValueExt::or, arg(0), imm_as_ctrl_ty()?)?, Opcode::BxorImm => binary(DataValueExt::xor, arg(0), imm_as_ctrl_ty()?)?, - Opcode::Rotl => binary(DataValueExt::rotl, arg(0), arg(1))?, - Opcode::Rotr => binary(DataValueExt::rotr, arg(0), arg(1))?, - Opcode::RotlImm => binary(DataValueExt::rotl, arg(0), imm_as_ctrl_ty()?)?, - Opcode::RotrImm => binary(DataValueExt::rotr, arg(0), imm_as_ctrl_ty()?)?, - Opcode::Ishl => binary(DataValueExt::shl, arg(0), arg(1))?, - Opcode::Ushr => binary(DataValueExt::ushr, arg(0), arg(1))?, - Opcode::Sshr => binary(DataValueExt::sshr, arg(0), arg(1))?, - Opcode::IshlImm => binary(DataValueExt::shl, arg(0), imm_as_ctrl_ty()?)?, - Opcode::UshrImm => binary(DataValueExt::ushr, arg(0), imm_as_ctrl_ty()?)?, - Opcode::SshrImm => binary(DataValueExt::sshr, arg(0), imm_as_ctrl_ty()?)?, + Opcode::Rotl => binary(DataValueExt::rotl, arg(0), shift_amt(ctrl_ty, arg(1))?)?, + Opcode::Rotr => binary(DataValueExt::rotr, arg(0), shift_amt(ctrl_ty, arg(1))?)?, + Opcode::RotlImm => binary(DataValueExt::rotl, arg(0), shift_amt(ctrl_ty, imm())?)?, + Opcode::RotrImm => binary(DataValueExt::rotr, arg(0), shift_amt(ctrl_ty, imm())?)?, + Opcode::Ishl => binary(DataValueExt::shl, arg(0), shift_amt(ctrl_ty, arg(1))?)?, + Opcode::Ushr => binary(DataValueExt::ushr, arg(0), shift_amt(ctrl_ty, arg(1))?)?, + Opcode::Sshr => binary(DataValueExt::sshr, arg(0), shift_amt(ctrl_ty, arg(1))?)?, + Opcode::IshlImm => binary(DataValueExt::shl, arg(0), shift_amt(ctrl_ty, imm())?)?, + Opcode::UshrImm => binary(DataValueExt::ushr, arg(0), shift_amt(ctrl_ty, imm())?)?, + Opcode::SshrImm => binary(DataValueExt::sshr, arg(0), shift_amt(ctrl_ty, imm())?)?, Opcode::Bitrev => unary(DataValueExt::reverse_bits, arg(0))?, Opcode::Bswap => unary(DataValueExt::swap_bytes, arg(0))?, Opcode::Clz => unary(DataValueExt::leading_zeros, arg(0))?, @@ -994,13 +994,7 @@ where } assign(DataValueExt::vector(new, types::I8X16)?) } - Opcode::Splat => { - let mut new_vector = SimdVec::new(); - for _ in 0..ctrl_ty.lane_count() { - new_vector.push(arg(0)); - } - assign(vectorizelanes(&new_vector, ctrl_ty)?) - } + Opcode::Splat => assign(splat(ctrl_ty, arg(0))?), Opcode::Insertlane => { let idx = imm().into_int_unsigned()? as usize; let mut vector = extractlanes(&arg(0), ctrl_ty)?; @@ -1575,3 +1569,17 @@ fn bitselect(c: DataValue, x: DataValue, y: DataValue) -> ValueResult let mask_y = DataValueExt::and(DataValueExt::not(c)?, y)?; DataValueExt::or(mask_x, mask_y) } + +fn splat(ty: Type, val: DataValue) -> ValueResult { + let mut new_vector = SimdVec::new(); + for _ in 0..ty.lane_count() { + new_vector.push(val.clone()); + } + vectorizelanes(&new_vector, ty) +} + +// Prepares the shift amount for a shift/rotate operation. +// The shift amount must be the same type and have the same number of lanes as the vector. +fn shift_amt(ty: Type, val: DataValue) -> ValueResult { + splat(ty, val.convert(ValueConversionKind::Exact(ty.lane_type()))?) +}