Skip to content

Commit

Permalink
[wasm-simd] Prototype relaxed integer Dot product instructions
Browse files Browse the repository at this point in the history
Prototype the instruction on the interpreter, and Arm64. Details of
instruction lowerings on all relevant architectures can be found at:
WebAssembly/relaxed-simd#52

Bug: v8:12908
Change-Id: If8ffb82c38042191c67c9b5c23a231877d4f2159
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3679848
Reviewed-by: Clemens Backes <[email protected]>
Commit-Queue: Ilya Rezvov <[email protected]>
Reviewed-by: Deepti Gandluri <[email protected]>
Cr-Commit-Position: refs/heads/main@{#80924}
  • Loading branch information
Ilya Rezvov authored and V8 LUCI CQ committed Jun 2, 2022
1 parent 90c80f7 commit a52b44f
Show file tree
Hide file tree
Showing 20 changed files with 248 additions and 2 deletions.
25 changes: 25 additions & 0 deletions src/compiler/backend/arm64/code-generator-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2488,6 +2488,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
break;
}
case kArm64I16x8DotI8x16S: {
UseScratchRegisterScope scope(tasm());
VRegister lhs = i.InputSimd128Register(0);
VRegister rhs = i.InputSimd128Register(1);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
__ Smull(tmp1, lhs.V8B(), rhs.V8B());
__ Smull2(tmp2, lhs.V16B(), rhs.V16B());
__ Addp(i.OutputSimd128Register().V8H(), tmp1, tmp2);
break;
}
case kArm64I32x4DotI8x16AddS: {
UseScratchRegisterScope scope(tasm());
VRegister lhs = i.InputSimd128Register(0);
VRegister rhs = i.InputSimd128Register(1);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
__ Smull(tmp1, lhs.V8B(), rhs.V8B());
__ Smull2(tmp2, lhs.V16B(), rhs.V16B());
__ Addp(tmp1, tmp1, tmp2);
__ Saddlp(tmp1.V4S(), tmp1);
__ Add(i.OutputSimd128Register().V4S(), tmp1.V4S(),
i.InputSimd128Register(2).V4S());
break;
}
case kArm64IExtractLaneU: {
VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode));
__ Umov(i.OutputRegister32(), i.InputSimd128Register(0).Format(f),
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/backend/arm64/instruction-codes-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ namespace compiler {
V(Arm64IGeU) \
V(Arm64I32x4BitMask) \
V(Arm64I32x4DotI16x8S) \
V(Arm64I16x8DotI8x16S) \
V(Arm64I32x4DotI8x16AddS) \
V(Arm64I32x4TruncSatF64x2SZero) \
V(Arm64I32x4TruncSatF64x2UZero) \
V(Arm64IExtractLaneU) \
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/backend/arm64/instruction-scheduler-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64IGeU:
case kArm64I32x4BitMask:
case kArm64I32x4DotI16x8S:
case kArm64I16x8DotI8x16S:
case kArm64I32x4DotI8x16AddS:
case kArm64I32x4TruncSatF64x2SZero:
case kArm64I32x4TruncSatF64x2UZero:
case kArm64IExtractLaneU:
Expand Down
8 changes: 8 additions & 0 deletions src/compiler/backend/arm64/instruction-selector-arm64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3538,6 +3538,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
#define SIMD_BINOP_LIST(V) \
V(I32x4Mul, kArm64I32x4Mul) \
V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
V(I16x8DotI8x16I7x16S, kArm64I16x8DotI8x16S) \
V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
V(I16x8Mul, kArm64I16x8Mul) \
V(I16x8UConvertI32x4, kArm64I16x8UConvertI32x4) \
Expand Down Expand Up @@ -3724,6 +3725,13 @@ void InstructionSelector::VisitS128Zero(Node* node) {
Emit(kArm64S128Zero, g.DefineAsRegister(node));
}

void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
Arm64OperandGenerator g(this);
Emit(
kArm64I32x4DotI8x16AddS, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
}

#define SIMD_VISIT_EXTRACT_LANE(Type, T, Sign, LaneSize) \
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
VisitRRI(this, \
Expand Down
14 changes: 14 additions & 0 deletions src/compiler/backend/instruction-selector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2372,6 +2372,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF32x4U(node);
case IrOpcode::kI16x8RelaxedQ15MulRS:
return MarkAsSimd128(node), VisitI16x8RelaxedQ15MulRS(node);
case IrOpcode::kI16x8DotI8x16I7x16S:
return MarkAsSimd128(node), VisitI16x8DotI8x16I7x16S(node);
case IrOpcode::kI32x4DotI8x16I7x16AddS:
return MarkAsSimd128(node), VisitI32x4DotI8x16I7x16AddS(node);
default:
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
node->op()->mnemonic(), node->id());
Expand Down Expand Up @@ -2830,6 +2834,16 @@ void InstructionSelector::VisitI16x8RelaxedQ15MulRS(Node* node) {
}
#endif // !V8_TARGET_ARCH_ARM6 && !V8_TARGET_ARCH_ARM

#if !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
UNIMPLEMENTED();
}

void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM6

void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }

void InstructionSelector::VisitParameter(Node* node) {
Expand Down
4 changes: 3 additions & 1 deletion src/compiler/machine-operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,9 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I32x4RelaxedTruncF32x4U, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF64x2SZero, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF64x2UZero, Operator::kNoProperties, 1, 0, 1) \
V(I16x8RelaxedQ15MulRS, Operator::kCommutative, 2, 0, 1)
V(I16x8RelaxedQ15MulRS, Operator::kCommutative, 2, 0, 1) \
V(I16x8DotI8x16I7x16S, Operator::kCommutative, 2, 0, 1) \
V(I32x4DotI8x16I7x16AddS, Operator::kNoProperties, 3, 0, 1)

// The format is:
// V(Name, properties, value_input_count, control_input_count, output_count)
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/machine-operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4RelaxedTruncF64x2SZero();
const Operator* I32x4RelaxedTruncF64x2UZero();
const Operator* I16x8RelaxedQ15MulRS();
const Operator* I16x8DotI8x16I7x16S();
const Operator* I32x4DotI8x16I7x16AddS();

// load [base + index]
const Operator* Load(LoadRepresentation rep);
Expand Down
2 changes: 2 additions & 0 deletions src/compiler/opcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -1001,6 +1001,8 @@
V(I32x4RelaxedTruncF64x2SZero) \
V(I32x4RelaxedTruncF64x2UZero) \
V(I16x8RelaxedQ15MulRS) \
V(I16x8DotI8x16I7x16S) \
V(I32x4DotI8x16I7x16AddS) \
V(I8x16Shuffle) \
V(V128AnyTrue) \
V(I64x2AllTrue) \
Expand Down
6 changes: 6 additions & 0 deletions src/compiler/wasm-compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4476,6 +4476,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI16x8RelaxedQ15MulRS:
return graph()->NewNode(mcgraph()->machine()->I16x8RelaxedQ15MulRS(),
inputs[0], inputs[1]);
case wasm::kExprI16x8DotI8x16I7x16S:
return graph()->NewNode(mcgraph()->machine()->I16x8DotI8x16I7x16S(),
inputs[0], inputs[1]);
case wasm::kExprI32x4DotI8x16I7x16AddS:
return graph()->NewNode(mcgraph()->machine()->I32x4DotI8x16I7x16AddS(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprI16x8Abs:
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
case wasm::kExprI16x8BitMask:
Expand Down
13 changes: 13 additions & 0 deletions src/wasm/baseline/arm/liftoff-assembler-arm.h
Original file line number Diff line number Diff line change
Expand Up @@ -3510,6 +3510,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
liftoff::GetSimd128Register(src2));
}

void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
}

void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
}

void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
Expand Down
25 changes: 25 additions & 0 deletions src/wasm/baseline/arm64/liftoff-assembler-arm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -3175,6 +3175,31 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
Sqrdmulh(dst.fp().V8H(), src1.fp().V8H(), src2.fp().V8H());
}

void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
UseScratchRegisterScope scope(this);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
Smull(tmp1, lhs.fp().V8B(), rhs.fp().V8B());
Smull2(tmp2, lhs.fp().V16B(), rhs.fp().V16B());
Addp(dst.fp().V8H(), tmp1, tmp2);
}

void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
UseScratchRegisterScope scope(this);
VRegister tmp1 = scope.AcquireV(kFormat8H);
VRegister tmp2 = scope.AcquireV(kFormat8H);
Smull(tmp1, lhs.fp().V8B(), rhs.fp().V8B());
Smull2(tmp2, lhs.fp().V16B(), rhs.fp().V16B());
Addp(tmp1, tmp1, tmp2);
Saddlp(tmp1.V4S(), tmp1);
Add(dst.fp().V4S(), tmp1.V4S(), acc.fp().V4S());
}

void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V4S(), src.fp().V4S());
Expand Down
13 changes: 13 additions & 0 deletions src/wasm/baseline/ia32/liftoff-assembler-ia32.h
Original file line number Diff line number Diff line change
Expand Up @@ -3657,6 +3657,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s");
}

void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
}

void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
}

void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Expand Down
7 changes: 7 additions & 0 deletions src/wasm/baseline/liftoff-assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1259,6 +1259,13 @@ class LiftoffAssembler : public TurboAssembler {
inline void emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2,
LiftoffRegister acc);
inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_alltrue(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src);
Expand Down
16 changes: 16 additions & 0 deletions src/wasm/baseline/liftoff-compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4102,6 +4102,22 @@ class LiftoffCompiler {
case wasm::kExprI32x4RelaxedTruncF64x2UZero:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_u_zero);
case wasm::kExprI16x8DotI8x16I7x16S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s);
case wasm::kExprI32x4DotI8x16I7x16AddS: {
// There is no helper for an instruction with 3 SIMD operands
// and we do not expect to add any more, so inlining it here.
static constexpr RegClass res_rc = reg_class_for(kS128);
LiftoffRegister acc = __ PopToRegister();
LiftoffRegister rhs = __ PopToRegister(LiftoffRegList{acc});
LiftoffRegister lhs = __ PopToRegister(LiftoffRegList{rhs, acc});
LiftoffRegister dst = __ GetUnusedRegister(res_rc, {lhs, rhs, acc}, {});

__ emit_i32x4_dot_i8x16_i7x16_add_s(dst, lhs, rhs, acc);
__ PushRegister(kS128, dst);
return;
}
default:
unsupported(decoder, kSimd, "simd");
}
Expand Down
13 changes: 13 additions & 0 deletions src/wasm/baseline/x64/liftoff-assembler-x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -3231,6 +3231,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s");
}

void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
}

void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
LiftoffRegister acc) {
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
}

void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
Expand Down
2 changes: 2 additions & 0 deletions src/wasm/wasm-opcodes-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I32x4_OP(RelaxedTruncF64x2SZero, "relaxed_trunc_f64x2_s_zero");
CASE_I32x4_OP(RelaxedTruncF64x2UZero, "relaxed_trunc_f64x2_u_zero");
CASE_I16x8_OP(RelaxedQ15MulRS, "relaxed_q15mulr_s")
CASE_I16x8_OP(DotI8x16I7x16S, "dot_i8x16_i7x16_s")
CASE_I32x4_OP(DotI8x16I7x16AddS, "dot_i8x16_i7x16_add_s")

// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
Expand Down
4 changes: 3 additions & 1 deletion src/wasm/wasm-opcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(F32x4RelaxedMax, 0xfd10e, s_ss) \
V(F64x2RelaxedMin, 0xfd10f, s_ss) \
V(F64x2RelaxedMax, 0xfd110, s_ss) \
V(I16x8RelaxedQ15MulRS, 0xfd111, s_ss)
V(I16x8RelaxedQ15MulRS, 0xfd111, s_ss) \
V(I16x8DotI8x16I7x16S, 0xfd112, s_ss) \
V(I32x4DotI8x16I7x16AddS, 0xfd113, s_sss)

#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLaneS, 0xfd15, _) \
Expand Down
58 changes: 58 additions & 0 deletions test/cctest/wasm/test-run-wasm-relaxed-simd.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,66 @@ WASM_RELAXED_SIMD_TEST(I16x8RelaxedQ15MulRS) {
RunI16x8BinOpTest<int16_t>(execution_tier, kExprI16x8RelaxedQ15MulRS,
SaturateRoundingQMul<int16_t>);
}

#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM

#if V8_TARGET_ARCH_ARM64
WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
WasmRunner<int32_t, int8_t, int8_t> r(execution_tier);
int16_t* g = r.builder().template AddGlobal<int16_t>(kWasmS128);
byte value1 = 0, value2 = 1;
byte temp1 = r.AllocateLocal(kWasmS128);
byte temp2 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_LOCAL_SET(temp1, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value1))),
WASM_LOCAL_SET(temp2, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value2))),
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI16x8DotI8x16I7x16S, WASM_LOCAL_GET(temp1),
WASM_LOCAL_GET(temp2))),
WASM_ONE);

for (int8_t x : compiler::ValueHelper::GetVector<int8_t>()) {
for (int8_t y : compiler::ValueHelper::GetVector<int8_t>()) {
r.Call(x, y & 0x7F);
// * 2 because we of (x*y) + (x*y) = 2*x*y
int16_t expected = base::MulWithWraparound(x * (y & 0x7F), 2);
for (int i = 0; i < 8; i++) {
CHECK_EQ(expected, LANE(g, i));
}
}
}
}

WASM_RELAXED_SIMD_TEST(I32x4DotI8x16I7x16AddS) {
WasmRunner<int32_t, int8_t, int8_t, int32_t> r(execution_tier);
int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
byte value1 = 0, value2 = 1, value3 = 2;
byte temp1 = r.AllocateLocal(kWasmS128);
byte temp2 = r.AllocateLocal(kWasmS128);
byte temp3 = r.AllocateLocal(kWasmS128);
BUILD(
r, WASM_LOCAL_SET(temp1, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value1))),
WASM_LOCAL_SET(temp2, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value2))),
WASM_LOCAL_SET(temp3, WASM_SIMD_I32x4_SPLAT(WASM_LOCAL_GET(value3))),
WASM_GLOBAL_SET(0, WASM_SIMD_TERNOP(
kExprI32x4DotI8x16I7x16AddS, WASM_LOCAL_GET(temp1),
WASM_LOCAL_GET(temp2), WASM_LOCAL_GET(temp3))),
WASM_ONE);

for (int8_t x : compiler::ValueHelper::GetVector<int8_t>()) {
for (int8_t y : compiler::ValueHelper::GetVector<int8_t>()) {
for (int32_t z : compiler::ValueHelper::GetVector<int32_t>()) {
r.Call(x, y & 0x7F, z);
int32_t expected = base::AddWithWraparound(
base::MulWithWraparound(x * (y & 0x7F), 4), z);
for (int i = 0; i < 4; i++) {
CHECK_EQ(expected, LANE(g, i));
}
}
}
}
}
#endif // V8_TARGET_ARCH_ARM64

#undef WASM_RELAXED_SIMD_TEST
} // namespace test_run_wasm_relaxed_simd
} // namespace wasm
Expand Down
33 changes: 33 additions & 0 deletions test/common/wasm/wasm-interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2782,6 +2782,39 @@ class WasmInterpreterInternals {
*len += 16;
return true;
}
case kExprI16x8DotI8x16I7x16S: {
int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16();
int8 res;
for (size_t i = 0; i < 8; i++) {
int16_t lo = (v1.val[LANE(i * 2, v1)] * v2.val[LANE(i * 2, v2)]);
int16_t hi =
(v1.val[LANE(i * 2 + 1, v1)] * v2.val[LANE(i * 2 + 1, v2)]);
res.val[LANE(i, res)] = base::AddWithWraparound(lo, hi);
}
Push(WasmValue(Simd128(res)));
return true;
}
case kExprI32x4DotI8x16I7x16AddS: {
int4 v3 = Pop().to_s128().to_i32x4();
int16 v2 = Pop().to_s128().to_i8x16();
int16 v1 = Pop().to_s128().to_i8x16();
int4 res;
for (size_t i = 0; i < 4; i++) {
int32_t a = (v1.val[LANE(i * 4, v1)] * v2.val[LANE(i * 4, v2)]);
int32_t b =
(v1.val[LANE(i * 4 + 1, v1)] * v2.val[LANE(i * 4 + 1, v2)]);
int32_t c =
(v1.val[LANE(i * 4 + 2, v1)] * v2.val[LANE(i * 4 + 2, v2)]);
int32_t d =
(v1.val[LANE(i * 4 + 3, v1)] * v2.val[LANE(i * 4 + 3, v2)]);
int32_t acc = v3.val[LANE(i, v3)];
// a + b + c + d should not wrap
res.val[LANE(i, res)] = base::AddWithWraparound(a + b + c + d, acc);
}
Push(WasmValue(Simd128(res)));
return true;
}
case kExprI8x16RelaxedSwizzle:
case kExprI8x16Swizzle: {
int16 v2 = Pop().to_s128().to_i8x16();
Expand Down
Loading

0 comments on commit a52b44f

Please sign in to comment.