Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement SIMD swizzle #1248

Merged
merged 2 commits into from
Mar 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/legalize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,9 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
let splat = insts.by_name("splat");
let shuffle = insts.by_name("shuffle");
let sshr = insts.by_name("sshr");
let swizzle = insts.by_name("swizzle");
let trueif = insts.by_name("trueif");
let uadd_sat = insts.by_name("uadd_sat");
let umax = insts.by_name("umax");
let umin = insts.by_name("umin");
let ushr_imm = insts.by_name("ushr_imm");
Expand Down Expand Up @@ -375,6 +377,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
let u128_zeroes = constant(vec![0x00; 16]);
let u128_ones = constant(vec![0xff; 16]);
let u128_seventies = constant(vec![0x70; 16]);
let a = var("a");
let b = var("b");
let c = var("c");
Expand Down Expand Up @@ -459,6 +462,21 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
);
}

// SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
// mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
// see https://github.com/WebAssembly/simd/issues/93.
{
let swizzle = swizzle.bind(vector(I8, sse_vector_size));
narrow.legalize(
def!(a = swizzle(x, y)),
vec![
def!(b = vconst(u128_seventies)),
def!(c = uadd_sat(y, b)),
def!(a = x86_pshufb(x, c)),
],
);
}

// SIMD bnot
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
let bnot = bnot.bind(vector(ty, sse_vector_size));
Expand Down
31 changes: 30 additions & 1 deletion cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -517,7 +517,36 @@ fn define_simd_lane_access(
.operands_out(vec![a]),
);

let x = &Operand::new("x", TxN).with_doc("SIMD vector to modify");
let I8x16 = &TypeVar::new(
"I8x16",
"A SIMD vector type consisting of 16 lanes of 8-bit integers",
TypeSetBuilder::new()
.ints(8..8)
.simd_lanes(16..16)
.includes_scalars(false)
.build(),
);
let x = &Operand::new("x", I8x16).with_doc("Vector to modify by re-arranging lanes");
let y = &Operand::new("y", I8x16).with_doc("Mask for re-arranging lanes");

ig.push(
Inst::new(
"swizzle",
r#"
Vector swizzle.

Returns a new vector with byte-width lanes selected from the lanes of the first input
vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range
``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the
resulting lane is 0. Note that this operates on byte-width lanes.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);

let x = &Operand::new("x", TxN).with_doc("The vector to modify");
let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,14 @@ block0:
; nextln: v4 = vconst.i8x16 0x00
; nextln: v1 = x86_pshufb v3, v4
; nextln: return v1

function %swizzle() -> i8x16 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v2 = swizzle.i8x16 v0, v1
; check: v3 = vconst.i8x16 0x70707070707070707070707070707070
; nextln: v4 = uadd_sat v1, v3
; nextln: v2 = x86_pshufb v0, v4
return v2
}
26 changes: 26 additions & 0 deletions cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,29 @@ block0:
return v8
}
; run

function %swizzle() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0

v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run:

function %swizzle_with_overflow() -> b1 {
block0:
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)

v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v4 = icmp eq v2, v3
v5 = vall_true v4
return v5
}
; run:
5 changes: 4 additions & 1 deletion cranelift/wasm/src/code_translator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
// to WASM using the less specific v128 type for certain operations and more specific
// types (e.g. i8x16) for others.
}
Operator::V8x16Swizzle => {
let (a, b) = pop2_with_bitcast(state, I8X16, builder);
state.push1(builder.ins().swizzle(I8X16, a, b))
}
Operator::I8x16Add | Operator::I16x8Add | Operator::I32x4Add | Operator::I64x2Add => {
let (a, b) = pop2_with_bitcast(state, type_of(op), builder);
state.push1(builder.ins().iadd(a, b))
Expand Down Expand Up @@ -1489,7 +1493,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I32x4WidenHighI16x8S { .. }
| Operator::I32x4WidenLowI16x8U { .. }
| Operator::I32x4WidenHighI16x8U { .. }
| Operator::V8x16Swizzle
| Operator::I16x8Load8x8S { .. }
| Operator::I16x8Load8x8U { .. }
| Operator::I32x4Load16x4S { .. }
Expand Down