From 5c27ca577d9ab2c040517f4c33636df5feaf0161 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 11 Dec 2024 13:46:35 +0000 Subject: [PATCH] X86: add support for SSE2/AVX2 intrinsics (#1128) Co-authored-by: Zhengyang Liu --- CMakeLists.txt | 1 + ir/instr.cpp | 16 - ir/instr.h | 23 + ir/x86_intrinsics.cpp | 743 ++++++++++++++++++ ir/x86_intrinsics.h | 62 ++ ir/x86_intrinsics_binop.inc | 135 ++++ ir/x86_intrinsics_terop.inc | 1 + llvm_util/llvm2alive.cpp | 41 + .../vector/x86/avx2_psign_w-0.srctgt.ll | 10 + .../vector/x86/avx2_psign_w-1.srctgt.ll | 10 + .../vector/x86/avx2_psign_w-2.srctgt.ll | 11 + .../vector/x86/avx2_psign_w-3.srctgt.ll | 12 + .../vector/x86/avx2_psrl_d-0.srctgt.ll | 10 + .../vector/x86/avx2_psrl_d-15.srctgt.ll | 11 + .../vector/x86/avx2_psrl_d-3.srctgt.ll | 13 + .../x86/avx2_psrl_d-overflow1.srctgt.ll | 10 + .../x86/avx2_psrl_d-overflow2.srctgt.ll | 10 + .../x86/avx2_psrl_d-overflow3.srctgt.ll | 10 + .../x86/avx2_psrl_d-overflow4.srctgt.ll | 12 + .../x86/avx2_psrl_d-overflow5.srctgt.ll | 13 + .../vector/x86/avx2_psrl_d-poison1.srctgt.ll | 10 + .../vector/x86/avx2_psrl_d-poison2.srctgt.ll | 10 + .../vector/x86/avx2_psrl_q-0.srctgt.ll | 10 + .../vector/x86/avx2_psrl_q-15.srctgt.ll | 11 + .../vector/x86/avx2_psrl_q-3.srctgt.ll | 13 + .../x86/avx2_psrl_q-overflow1.srctgt.ll | 10 + .../x86/avx2_psrl_q-overflow2.srctgt.ll | 10 + .../x86/avx2_psrl_q-overflow3.srctgt.ll | 10 + .../x86/avx2_psrl_q-overflow4.srctgt.ll | 12 + .../x86/avx2_psrl_q-overflow5.srctgt.ll | 12 + .../vector/x86/avx2_psrl_q-poison1.srctgt.ll | 10 + .../vector/x86/avx2_psrl_q-poison2.srctgt.ll | 10 + .../vector/x86/avx2_psrl_w-0.srctgt.ll | 10 + .../vector/x86/avx2_psrl_w-15.srctgt.ll | 11 + .../vector/x86/avx2_psrl_w-3.srctgt.ll | 13 + .../x86/avx2_psrl_w-overflow1.srctgt.ll | 10 + .../x86/avx2_psrl_w-overflow2.srctgt.ll | 10 + .../x86/avx2_psrl_w-overflow3.srctgt.ll | 10 + .../x86/avx2_psrl_w-overflow4.srctgt.ll | 13 + .../x86/avx2_psrl_w-overflow5.srctgt.ll | 12 + .../vector/x86/avx2_psrl_w-poison1.srctgt.ll | 10 + .../vector/x86/avx2_psrl_w-poison2.srctgt.ll | 10 + .../vector/x86/regression-feb-8-01.srctgt.ll | 16 + .../vector/x86/regression-feb-8-02.srctgt.ll | 11 + .../vector/x86/regression-feb-9-01.srctgt.ll | 10 + .../vector/x86/regression-feb-9-02.srctgt.ll | 10 + .../vector/x86/regression-feb-9-03.srctgt.ll | 10 + .../vector/x86/regression-feb-9-04.srctgt.ll | 10 + .../vector/x86/regression-feb-9-05.srctgt.ll | 10 + .../vector/x86/regression-feb-9-06.srctgt.ll | 10 + .../vector/x86/regression-feb-9-07.srctgt.ll | 10 + .../vector/x86/regression-feb-9-08.srctgt.ll | 10 + .../vector/x86/regression-feb-9-09.srctgt.ll | 10 + .../vector/x86/regression-feb-9-10.srctgt.ll | 10 + .../vector/x86/regression-feb-9-11.srctgt.ll | 10 + .../vector/x86/sse2_pavg_w_0-failed.srctgt.ll | 12 + .../vector/x86/sse2_pavg_w_0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_d-0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_d-15.srctgt.ll | 11 + .../vector/x86/sse2_psrl_d-3.srctgt.ll | 13 + .../x86/sse2_psrl_d-overflow1.srctgt.ll | 10 + .../x86/sse2_psrl_d-overflow2.srctgt.ll | 10 + .../x86/sse2_psrl_d-overflow3.srctgt.ll | 10 + .../x86/sse2_psrl_d-overflow4.srctgt.ll | 12 + .../x86/sse2_psrl_d-overflow5.srctgt.ll | 13 + .../vector/x86/sse2_psrl_d-poison1.srctgt.ll | 10 + .../vector/x86/sse2_psrl_d-poison2.srctgt.ll | 10 + .../vector/x86/sse2_psrl_q-0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_q-15.srctgt.ll | 11 + .../vector/x86/sse2_psrl_q-3.srctgt.ll | 13 + .../x86/sse2_psrl_q-overflow1.srctgt.ll | 10 + .../x86/sse2_psrl_q-overflow2.srctgt.ll | 10 + .../x86/sse2_psrl_q-overflow3.srctgt.ll | 10 + .../x86/sse2_psrl_q-overflow4.srctgt.ll | 12 + .../x86/sse2_psrl_q-overflow5.srctgt.ll | 12 + .../vector/x86/sse2_psrl_q-poison1.srctgt.ll | 10 + .../vector/x86/sse2_psrl_q-poison2.srctgt.ll | 10 + .../vector/x86/sse2_psrl_w-0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_w-15.srctgt.ll | 11 + .../vector/x86/sse2_psrl_w-3.srctgt.ll | 13 + .../x86/sse2_psrl_w-overflow1.srctgt.ll | 10 + .../x86/sse2_psrl_w-overflow2.srctgt.ll | 10 + .../x86/sse2_psrl_w-overflow3.srctgt.ll | 10 + .../x86/sse2_psrl_w-overflow4.srctgt.ll | 13 + .../x86/sse2_psrl_w-overflow5.srctgt.ll | 12 + .../vector/x86/sse2_psrl_w-poison1.srctgt.ll | 10 + .../vector/x86/sse2_psrl_w-poison2.srctgt.ll | 10 + 87 files changed, 1860 insertions(+), 16 deletions(-) create mode 100644 ir/x86_intrinsics.cpp create mode 100644 ir/x86_intrinsics.h create mode 100644 ir/x86_intrinsics_binop.inc create mode 100644 ir/x86_intrinsics_terop.inc create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll diff --git a/CMakeLists.txt b/CMakeLists.txt index d47b6eb3b..b92e19a94 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,6 +104,7 @@ set(IR_SRCS ir/state_value.cpp ir/type.cpp ir/value.cpp + ir/x86_intrinsics.cpp ) add_library(ir STATIC ${IR_SRCS}) diff --git a/ir/instr.cpp b/ir/instr.cpp index 2c9b97aa0..1431220a8 100644 --- a/ir/instr.cpp +++ b/ir/instr.cpp @@ -19,22 +19,6 @@ using namespace smt; using namespace util; using namespace std; -#define RAUW(val) \ - if (val == &what) \ - val = &with -#define DEFINE_AS_RETZERO(cls, method) \ - uint64_t cls::method() const { return 0; } -#define DEFINE_AS_RETZEROALIGN(cls, method) \ - pair cls::method() const { return { 0, 1 }; } -#define DEFINE_AS_RETFALSE(cls, method) \ - bool cls::method() const { return false; } -#define DEFINE_AS_EMPTYACCESS(cls) \ - MemInstr::ByteAccessInfo cls::getByteAccessInfo() const \ - { return {}; } - -// log2 of max number of var args per function -#define VARARG_BITS 8 - namespace { struct print_type { IR::Type &ty; diff --git a/ir/instr.h b/ir/instr.h index 53efe0b75..495320617 100644 --- a/ir/instr.h +++ b/ir/instr.h @@ -9,6 +9,29 @@ #include #include +#define RAUW(val) \ + if (val == &what) \ + val = &with +#define DEFINE_AS_RETZERO(cls, method) \ + uint64_t cls::method() const { \ + return 0; \ + } +#define DEFINE_AS_RETZEROALIGN(cls, method) \ + pair cls::method() const { \ + return {0, 1}; \ + } +#define DEFINE_AS_RETFALSE(cls, method) \ + bool cls::method() const { \ + return false; \ + } +#define DEFINE_AS_EMPTYACCESS(cls) \ + MemInstr::ByteAccessInfo cls::getByteAccessInfo() const { \ + return {}; \ + } + +// log2 of max number of var args per function +#define VARARG_BITS 8 + namespace IR { class Function; diff --git a/ir/x86_intrinsics.cpp b/ir/x86_intrinsics.cpp new file mode 100644 index 000000000..22d2ec398 --- /dev/null +++ b/ir/x86_intrinsics.cpp @@ -0,0 +1,743 @@ +#include "ir/x86_intrinsics.h" +#include "smt/expr.h" + +using namespace smt; +using namespace std; + +namespace IR { +// the shape of a vector is stored as <# of lanes, element bits> +static constexpr std::pair binop_shape_op0[] = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(C, D), +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +static constexpr std::pair binop_shape_op1[] = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(E, F), +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +static constexpr std::pair binop_shape_ret[] = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(A, B), +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +static constexpr unsigned binop_ret_width[] = { +#define PROCESS(NAME, A, B, C, D, E, F) A *B, +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +unsigned X86IntrinBinOp::getRetWidth(Op op) { + return binop_ret_width[op]; +} + +vector X86IntrinBinOp::operands() const { + return {a, b}; +} + +bool X86IntrinBinOp::propagatesPoison() const { + return true; +} + +bool X86IntrinBinOp::hasSideEffects() const { + return false; +} + +void X86IntrinBinOp::rauw(const Value &what, Value &with) { + RAUW(a); + RAUW(b); +} + +string X86IntrinBinOp::getOpName(Op op) { + switch (op) { +#define PROCESS(NAME, A, B, C, D, E, F) \ + case NAME: \ + return #NAME; +#include "x86_intrinsics_binop.inc" +#undef PROCESS + } + UNREACHABLE(); +} + +void X86IntrinBinOp::print(ostream &os) const { + os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; +} + +StateValue X86IntrinBinOp::toSMT(State &s) const { + auto rty = getType().getAsAggregateType(); + auto aty = a->getType().getAsAggregateType(); + auto bty = b->getType().getAsAggregateType(); + auto &av = s[*a]; + auto &bv = s[*b]; + + switch (op) { + // shift by one variable + case x86_sse2_psrl_w: + case x86_sse2_psrl_d: + case x86_sse2_psrl_q: + case x86_avx2_psrl_w: + case x86_avx2_psrl_d: + case x86_avx2_psrl_q: + case x86_avx512_psrl_w_512: + case x86_avx512_psrl_d_512: + case x86_avx512_psrl_q_512: + case x86_sse2_psra_w: + case x86_sse2_psra_d: + case x86_avx2_psra_w: + case x86_avx2_psra_d: + case x86_avx512_psra_q_128: + case x86_avx512_psra_q_256: + case x86_avx512_psra_w_512: + case x86_avx512_psra_d_512: + case x86_avx512_psra_q_512: + case x86_sse2_psll_w: + case x86_sse2_psll_d: + case x86_sse2_psll_q: + case x86_avx2_psll_w: + case x86_avx2_psll_d: + case x86_avx2_psll_q: + case x86_avx512_psll_w_512: + case x86_avx512_psll_d_512: + case x86_avx512_psll_q_512: { + vector vals; + unsigned elem_bw = bty->getChild(0).bits(); + + expr shift_np = true; + expr shift_v; + // extract lower 64 bits from b + for (unsigned i = 0, e = 64 / elem_bw; i != e; ++i) { + StateValue vv = bty->extract(bv, i); + shift_v = (i == 0) ? vv.value : vv.value.concat(shift_v); + // if any elements in lower 64 bits is poison, the result is poison + shift_np &= vv.non_poison; + } + function fn; + switch (op) { + case x86_sse2_psrl_w: + case x86_sse2_psrl_d: + case x86_sse2_psrl_q: + case x86_avx2_psrl_w: + case x86_avx2_psrl_d: + case x86_avx2_psrl_q: + case x86_avx512_psrl_w_512: + case x86_avx512_psrl_d_512: + case x86_avx512_psrl_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkUInt(0, elem_bw), a.lshr(b)); + }; + break; + case x86_sse2_psra_w: + case x86_sse2_psra_d: + case x86_avx2_psra_w: + case x86_avx2_psra_d: + case x86_avx512_psra_q_128: + case x86_avx512_psra_q_256: + case x86_avx512_psra_w_512: + case x86_avx512_psra_d_512: + case x86_avx512_psra_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkIf(a.isNegative(), expr::mkUInt(-1, elem_bw), + expr::mkUInt(0, elem_bw)), + a.ashr(b)); + }; + break; + case x86_sse2_psll_w: + case x86_sse2_psll_d: + case x86_sse2_psll_q: + case x86_avx2_psll_w: + case x86_avx2_psll_d: + case x86_avx2_psll_q: + case x86_avx512_psll_w_512: + case x86_avx512_psll_d_512: + case x86_avx512_psll_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkUInt(0, elem_bw), a << b); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = aty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + expr shift = fn(ai.value, shift_v.trunc(elem_bw)); + vals.emplace_back(std::move(shift), shift_np && ai.non_poison); + } + return rty->aggregateVals(vals); + } + // vertical + case x86_sse2_pavg_w: + case x86_sse2_pavg_b: + case x86_avx2_pavg_w: + case x86_avx2_pavg_b: + case x86_avx512_pavg_w_512: + case x86_avx512_pavg_b_512: + case x86_ssse3_psign_b_128: + case x86_ssse3_psign_w_128: + case x86_ssse3_psign_d_128: + case x86_avx2_psign_b: + case x86_avx2_psign_w: + case x86_avx2_psign_d: + case x86_avx2_psrlv_d: + case x86_avx2_psrlv_d_256: + case x86_avx2_psrlv_q: + case x86_avx2_psrlv_q_256: + case x86_avx512_psrlv_d_512: + case x86_avx512_psrlv_q_512: + case x86_avx512_psrlv_w_128: + case x86_avx512_psrlv_w_256: + case x86_avx512_psrlv_w_512: + case x86_avx2_psrav_d: + case x86_avx2_psrav_d_256: + case x86_avx512_psrav_d_512: + case x86_avx512_psrav_q_128: + case x86_avx512_psrav_q_256: + case x86_avx512_psrav_q_512: + case x86_avx512_psrav_w_128: + case x86_avx512_psrav_w_256: + case x86_avx512_psrav_w_512: + case x86_avx2_psllv_d: + case x86_avx2_psllv_d_256: + case x86_avx2_psllv_q: + case x86_avx2_psllv_q_256: + case x86_avx512_psllv_d_512: + case x86_avx512_psllv_q_512: + case x86_avx512_psllv_w_128: + case x86_avx512_psllv_w_256: + case x86_avx512_psllv_w_512: + case x86_sse2_pmulh_w: + case x86_avx2_pmulh_w: + case x86_avx512_pmulh_w_512: + case x86_sse2_pmulhu_w: + case x86_avx2_pmulhu_w: + case x86_avx512_pmulhu_w_512: { + vector vals; + function fn; + switch (op) { + case x86_sse2_pavg_w: + case x86_sse2_pavg_b: + case x86_avx2_pavg_w: + case x86_avx2_pavg_b: + case x86_avx512_pavg_w_512: + case x86_avx512_pavg_b_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return (a.zext(1) + b.zext(1) + expr::mkUInt(1, bw + 1)) + .lshr(expr::mkUInt(1, bw + 1)) + .trunc(bw); + }; + break; + case x86_ssse3_psign_b_128: + case x86_ssse3_psign_w_128: + case x86_ssse3_psign_d_128: + case x86_avx2_psign_b: + case x86_avx2_psign_w: + case x86_avx2_psign_d: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf( + b.isZero(), b, + expr::mkIf(b.isNegative(), expr::mkUInt(0, a.bits()) - a, a)); + }; + break; + case x86_avx2_psrlv_d: + case x86_avx2_psrlv_d_256: + case x86_avx2_psrlv_q: + case x86_avx2_psrlv_q_256: + case x86_avx512_psrlv_d_512: + case x86_avx512_psrlv_q_512: + case x86_avx512_psrlv_w_128: + case x86_avx512_psrlv_w_256: + case x86_avx512_psrlv_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), expr::mkUInt(0, bw), + a.lshr(b)); + }; + break; + case x86_avx2_psrav_d: + case x86_avx2_psrav_d_256: + case x86_avx512_psrav_d_512: + case x86_avx512_psrav_q_128: + case x86_avx512_psrav_q_256: + case x86_avx512_psrav_q_512: + case x86_avx512_psrav_w_128: + case x86_avx512_psrav_w_256: + case x86_avx512_psrav_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), + expr::mkIf(a.isNegative(), expr::mkUInt(-1, bw), + expr::mkUInt(0, bw)), + a.ashr(b)); + }; + break; + case x86_avx2_psllv_d: + case x86_avx2_psllv_d_256: + case x86_avx2_psllv_q: + case x86_avx2_psllv_q_256: + case x86_avx512_psllv_d_512: + case x86_avx512_psllv_q_512: + case x86_avx512_psllv_w_128: + case x86_avx512_psllv_w_256: + case x86_avx512_psllv_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), expr::mkUInt(0, bw), + a << b); + }; + break; + case x86_sse2_pmulh_w: + case x86_avx2_pmulh_w: + case x86_avx512_pmulh_w_512: + fn = [&](auto a, auto b) -> expr { + expr mul = a.sext(16) * b.sext(16); + return mul.extract(31, 16); + }; + break; + case x86_sse2_pmulhu_w: + case x86_avx2_pmulhu_w: + case x86_avx512_pmulhu_w_512: + fn = [&](auto a, auto b) -> expr { + expr mul = a.zext(16) * b.zext(16); + return mul.extract(31, 16); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = rty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + auto bi = bty->extract(bv, i); + vals.emplace_back(fn(ai.value, bi.value), ai.non_poison && bi.non_poison); + } + return rty->aggregateVals(vals); + } + // pshuf.b + case x86_ssse3_pshuf_b_128: + case x86_avx2_pshuf_b: + case x86_avx512_pshuf_b_512: { + auto avty = static_cast(aty); + vector vals; + unsigned laneCount = binop_shape_ret[op].first; + for (unsigned i = 0; i != laneCount; ++i) { + auto [b, bp] = bty->extract(bv, i); + expr id = (b & expr::mkUInt(0x0F, 8)) + (expr::mkUInt(i & 0x30, 8)); + auto [r, rp] = avty->extract(av, id); + auto ai = expr::mkIf(b.extract(7, 7) == 0, r, expr::mkUInt(0, 8)); + vals.emplace_back(std::move(ai), bp && rp); + } + return rty->aggregateVals(vals); + } + // horizontal + case x86_ssse3_phadd_w_128: + case x86_ssse3_phadd_d_128: + case x86_ssse3_phadd_sw_128: + case x86_avx2_phadd_w: + case x86_avx2_phadd_d: + case x86_avx2_phadd_sw: + case x86_ssse3_phsub_w_128: + case x86_ssse3_phsub_d_128: + case x86_ssse3_phsub_sw_128: + case x86_avx2_phsub_w: + case x86_avx2_phsub_d: + case x86_avx2_phsub_sw: { + vector vals; + unsigned laneCount = binop_shape_ret[op].first; + unsigned groupsize = 128 / binop_shape_ret[op].second; + function fn; + switch (op) { + case x86_ssse3_phadd_w_128: + case x86_ssse3_phadd_d_128: + case x86_avx2_phadd_w: + case x86_avx2_phadd_d: + fn = [&](auto a, auto b) -> expr { return a + b; }; + break; + case x86_ssse3_phadd_sw_128: + case x86_avx2_phadd_sw: + fn = [&](auto a, auto b) -> expr { return a.sadd_sat(b); }; + break; + case x86_ssse3_phsub_w_128: + case x86_ssse3_phsub_d_128: + case x86_avx2_phsub_w: + case x86_avx2_phsub_d: + fn = [&](auto a, auto b) -> expr { return a - b; }; + break; + case x86_ssse3_phsub_sw_128: + case x86_avx2_phsub_sw: + fn = [&](auto a, auto b) -> expr { return a.ssub_sat(b); }; + break; + default: + UNREACHABLE(); + } + for (unsigned j = 0; j != laneCount / groupsize; j++) { + for (unsigned i = 0; i != groupsize; i += 2) { + auto [a1, p1] = aty->extract(av, j * groupsize + i); + auto [a2, p2] = aty->extract(av, j * groupsize + i + 1); + vals.emplace_back(fn(a1, a2), p1 && p2); + } + for (unsigned i = 0; i != groupsize; i += 2) { + auto [b1, p1] = aty->extract(bv, j * groupsize + i); + auto [b2, p2] = aty->extract(bv, j * groupsize + i + 1); + vals.emplace_back(fn(b1, b2), p1 && p2); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_psrli_w: + case x86_sse2_psrli_d: + case x86_sse2_psrli_q: + case x86_avx2_psrli_w: + case x86_avx2_psrli_d: + case x86_avx2_psrli_q: + case x86_avx512_psrli_w_512: + case x86_avx512_psrli_d_512: + case x86_avx512_psrli_q_512: + case x86_sse2_psrai_w: + case x86_sse2_psrai_d: + case x86_avx2_psrai_w: + case x86_avx2_psrai_d: + case x86_avx512_psrai_w_512: + case x86_avx512_psrai_d_512: + case x86_avx512_psrai_q_128: + case x86_avx512_psrai_q_256: + case x86_avx512_psrai_q_512: + case x86_sse2_pslli_w: + case x86_sse2_pslli_d: + case x86_sse2_pslli_q: + case x86_avx2_pslli_w: + case x86_avx2_pslli_d: + case x86_avx2_pslli_q: + case x86_avx512_pslli_w_512: + case x86_avx512_pslli_d_512: + case x86_avx512_pslli_q_512: { + vector vals; + function fn; + switch (op) { + case x86_sse2_psrai_w: + case x86_sse2_psrai_d: + case x86_avx2_psrai_w: + case x86_avx2_psrai_d: + case x86_avx512_psrai_w_512: + case x86_avx512_psrai_d_512: + case x86_avx512_psrai_q_128: + case x86_avx512_psrai_q_256: + case x86_avx512_psrai_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkIf(a.isNegative(), expr::mkInt(-1, sz_a), + expr::mkUInt(0, sz_a)); + expr inbounds = a.ashr(b.zextOrTrunc(sz_a)); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + case x86_sse2_psrli_w: + case x86_sse2_psrli_d: + case x86_sse2_psrli_q: + case x86_avx2_psrli_w: + case x86_avx2_psrli_d: + case x86_avx2_psrli_q: + case x86_avx512_psrli_w_512: + case x86_avx512_psrli_d_512: + case x86_avx512_psrli_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkUInt(0, sz_a); + expr inbounds = a.lshr(b.zextOrTrunc(sz_a)); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + case x86_sse2_pslli_w: + case x86_sse2_pslli_d: + case x86_sse2_pslli_q: + case x86_avx2_pslli_w: + case x86_avx2_pslli_d: + case x86_avx2_pslli_q: + case x86_avx512_pslli_w_512: + case x86_avx512_pslli_d_512: + case x86_avx512_pslli_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkUInt(0, sz_a); + expr inbounds = a << b.zextOrTrunc(sz_a); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = rty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + vals.emplace_back(fn(ai.value, bv.value), ai.non_poison && bv.non_poison); + } + return rty->aggregateVals(vals); + } + case x86_sse2_pmadd_wd: + case x86_avx2_pmadd_wd: + case x86_avx512_pmaddw_d_512: + case x86_ssse3_pmadd_ub_sw_128: + case x86_avx2_pmadd_ub_sw: + case x86_avx512_pmaddubs_w_512: { + vector vals; + for (unsigned i = 0, e = binop_shape_ret[op].first; i != e; ++i) { + auto [a1, a1p] = aty->extract(av, i * 2); + auto [a2, a2p] = aty->extract(av, i * 2 + 1); + auto [b1, b1p] = bty->extract(bv, i * 2); + auto [b2, b2p] = bty->extract(bv, i * 2 + 1); + + auto np = a1p && a2p && b1p && b2p; + + if (op == x86_sse2_pmadd_wd || op == x86_avx2_pmadd_wd || + op == x86_avx512_pmaddw_d_512) { + expr v = a1.sext(16) * b1.sext(16) + a2.sext(16) * b2.sext(16); + vals.emplace_back(std::move(v), std::move(np)); + } else { + expr v = (a1.zext(8) * b1.sext(8)).sadd_sat(a2.zext(8) * b2.sext(8)); + vals.emplace_back(std::move(v), std::move(np)); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_packsswb_128: + case x86_avx2_packsswb: + case x86_avx512_packsswb_512: + case x86_sse2_packuswb_128: + case x86_avx2_packuswb: + case x86_avx512_packuswb_512: + case x86_sse2_packssdw_128: + case x86_avx2_packssdw: + case x86_avx512_packssdw_512: + case x86_sse41_packusdw: + case x86_avx2_packusdw: + case x86_avx512_packusdw_512: { + vector vals; + function fn; + if (op == x86_sse2_packsswb_128 || op == x86_avx2_packsswb || + op == x86_avx512_packsswb_512 || op == x86_sse2_packssdw_128 || + op == x86_avx2_packssdw || op == x86_avx512_packssdw_512) { + fn = [&](auto a) -> expr { + unsigned bw = a.bits() / 2; + auto min = expr::IntSMin(bw); + auto max = expr::IntSMax(bw); + return expr::mkIf(a.sle(min.sext(bw)), min, + expr::mkIf(a.sge(max.sext(bw)), max, a.trunc(bw))); + }; + } else { + fn = [&](auto a) -> expr { + unsigned bw = a.bits() / 2; + auto max = expr::IntUMax(bw); + auto zero = expr::mkUInt(0, bw); + return expr::mkIf(a.sle(zero.zext(bw)), zero, + expr::mkIf(a.sge(max.zext(bw)), max, a.trunc(bw))); + }; + } + + unsigned groupsize = 128 / binop_shape_op1[op].second; + unsigned laneCount = binop_shape_op1[op].first; + for (unsigned j = 0; j != laneCount / groupsize; j++) { + for (unsigned i = 0; i != groupsize; i++) { + auto [a1, p1] = aty->extract(av, j * groupsize + i); + vals.emplace_back(fn(std::move(a1)), std::move(p1)); + } + for (unsigned i = 0; i != groupsize; i++) { + auto [b1, p1] = aty->extract(bv, j * groupsize + i); + vals.emplace_back(fn(std::move(b1)), std::move(p1)); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_psad_bw: + case x86_avx2_psad_bw: + case x86_avx512_psad_bw_512: { + unsigned ngroup = binop_shape_ret[op].first; + vector vals; + for (unsigned j = 0; j < ngroup; ++j) { + expr np = true; + expr v; + for (unsigned i = 0; i < 8; ++i) { + auto [a, ap] = aty->extract(av, 8 * j + i); + auto [b, bp] = bty->extract(bv, 8 * j + i); + np = np && ap && bp; + if (i == 0) + v = (a.zext(8) - b.zext(8)).abs(); + else + v = v + (a.zext(8) - b.zext(8)).abs(); + } + vals.emplace_back(v.zext(48), std::move(np)); + } + return rty->aggregateVals(vals); + } + } + UNREACHABLE(); +} + +expr X86IntrinBinOp::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + (binop_shape_op0[op].first != 1 + ? a->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(binop_shape_op0[op].second); + }) && + a->getType().getAsAggregateType()->numElements() == + binop_shape_op0[op].first + : a->getType().enforceIntType(binop_shape_op0[op].second)) && + (binop_shape_op1[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(binop_shape_op1[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + binop_shape_op1[op].first + : b->getType().enforceIntType(binop_shape_op1[op].second)) && + (binop_shape_ret[op].first != 1 + ? getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(binop_shape_ret[op].second); + }) && + getType().getAsAggregateType()->numElements() == + binop_shape_ret[op].first + : getType().enforceIntType(binop_shape_ret[op].second)); +} + +unique_ptr X86IntrinBinOp::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *a, *b, op); +} + +// the shape of a vector is stored as <# of lanes, element bits> +static constexpr std::pair terop_shape_op0[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(C, D), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +static constexpr std::pair terop_shape_op1[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(E, F), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +static constexpr std::pair terop_shape_op2[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(G, H), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +static constexpr std::pair terop_shape_ret[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(A, B), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +static constexpr unsigned terop_ret_width[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) A *B, +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +string X86IntrinTerOp::getOpName(Op op) { + switch (op) { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) \ + case NAME: \ + return #NAME; +#include "x86_intrinsics_terop.inc" +#undef PROCESS + } + UNREACHABLE(); +} + +unsigned X86IntrinTerOp::getRetWidth(Op op) { + return terop_ret_width[op]; +} + +void X86IntrinTerOp::print(ostream &os) const { + os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; +} + +StateValue X86IntrinTerOp::toSMT(State &s) const { + auto rty = getType().getAsAggregateType(); + auto aty = a->getType().getAsAggregateType(); + auto bty = b->getType().getAsAggregateType(); + auto cty = c->getType().getAsAggregateType(); + auto &av = s[*a]; + auto &bv = s[*b]; + auto &cv = s[*c]; + + switch (op) { + case x86_avx2_pblendvb: { + vector vals; + + for (int i = 0; i < 32; ++i) { + auto [a, ap] = aty->extract(av, i); + auto [b, bp] = bty->extract(bv, i); + auto [c, cp] = cty->extract(cv, i); + auto v = expr::mkIf(c.extract(7, 7) == 0, a, b); + vals.emplace_back(std::move(v), ap && bp && cp); + } + return rty->aggregateVals(vals); + } + } + UNREACHABLE(); +} + +expr X86IntrinTerOp::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + (terop_shape_op0[op].first != 1 + ? a->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(terop_shape_op0[op].second); + }) && + a->getType().getAsAggregateType()->numElements() == + terop_shape_op0[op].first + : a->getType().enforceIntType(terop_shape_op0[op].second)) && + (terop_shape_op1[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(terop_shape_op1[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + terop_shape_op1[op].first + : b->getType().enforceIntType(terop_shape_op1[op].second)) && + (terop_shape_op2[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(terop_shape_op2[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + terop_shape_op2[op].first + : b->getType().enforceIntType(terop_shape_op2[op].second)) && + (terop_shape_ret[op].first != 1 + ? getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(terop_shape_ret[op].second); + }) && + getType().getAsAggregateType()->numElements() == + terop_shape_ret[op].first + : getType().enforceIntType(terop_shape_ret[op].second)); +} + +unique_ptr X86IntrinTerOp::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *a, *b, *c, + op); +} + +vector X86IntrinTerOp::operands() const { + return {a, b, c}; +} + +bool X86IntrinTerOp::propagatesPoison() const { + return true; +} + +bool X86IntrinTerOp::hasSideEffects() const { + return false; +} + +void X86IntrinTerOp::rauw(const Value &what, Value &with) { + RAUW(a); + RAUW(b); + RAUW(c); +} +} // namespace IR diff --git a/ir/x86_intrinsics.h b/ir/x86_intrinsics.h new file mode 100644 index 000000000..aaa09c989 --- /dev/null +++ b/ir/x86_intrinsics.h @@ -0,0 +1,62 @@ +#pragma once + +#include "ir/instr.h" + +namespace IR { +class X86IntrinBinOp final : public Instr { +public: + enum Op { +#define PROCESS(NAME, A, B, C, D, E, F) NAME, +#include "x86_intrinsics_binop.inc" +#undef PROCESS + }; + +private: + Value *a, *b; + Op op; + +public: + static unsigned getRetWidth(Op op); + X86IntrinBinOp(Type &type, std::string &&name, Value &a, Value &b, Op op) + : Instr(type, std::move(name)), a(&a), b(&b), op(op) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + static std::string getOpName(Op op); + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; + +class X86IntrinTerOp final : public Instr { +public: + enum Op { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) NAME, +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + +private: + Value *a, *b, *c; + Op op; + +public: + static unsigned getRetWidth(Op op); + X86IntrinTerOp(Type &type, std::string &&name, Value &a, Value &b, Value &c, + Op op) + : Instr(type, std::move(name)), a(&a), b(&b), c(&c), op(op) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + static std::string getOpName(Op op); + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; +} // namespace IR diff --git a/ir/x86_intrinsics_binop.inc b/ir/x86_intrinsics_binop.inc new file mode 100644 index 000000000..d44fbfef6 --- /dev/null +++ b/ir/x86_intrinsics_binop.inc @@ -0,0 +1,135 @@ +PROCESS(x86_sse2_pavg_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_pavg_b, 16, 8, 16, 8, 16, 8) +PROCESS(x86_avx2_pavg_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_pavg_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_avx512_pavg_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_avx512_pavg_b_512, 64, 8, 64, 8, 64, 8) +PROCESS(x86_avx2_pshuf_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_ssse3_pshuf_b_128, 16, 8, 16, 8, 16, 8) +PROCESS(x86_avx512_pshuf_b_512, 64, 8, 64, 8, 64, 8) +PROCESS(x86_sse2_psrl_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psrl_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_sse2_psrl_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psrl_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psrl_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx2_psrl_q, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psrl_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psrl_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psrl_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_psrli_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_psrli_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_sse2_psrli_q, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx2_psrli_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_psrli_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx2_psrli_q, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_psrli_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_psrli_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_psrli_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psrlv_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psrlv_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_psrlv_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psrlv_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psrlv_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psrlv_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psrlv_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psrlv_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psrlv_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_psra_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psra_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psra_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psra_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx512_psra_q_128, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx512_psra_q_256, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psra_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psra_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psra_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_psrai_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_psrai_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_avx2_psrai_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_psrai_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx512_psrai_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_psrai_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_psrai_q_128, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx512_psrai_q_256, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_psrai_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psrav_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psrav_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx512_psrav_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psrav_q_128, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx512_psrav_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psrav_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psrav_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psrav_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psrav_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_psll_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psll_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_sse2_psll_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psll_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psll_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx2_psll_q, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psll_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psll_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psll_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_pslli_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_pslli_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_sse2_pslli_q, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx2_pslli_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_pslli_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx2_pslli_q, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_pslli_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_pslli_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_pslli_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psllv_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psllv_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_psllv_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psllv_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psllv_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psllv_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psllv_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psllv_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psllv_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_ssse3_psign_b_128, 16, 8, 16, 8, 16, 8) +PROCESS(x86_ssse3_psign_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_psign_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psign_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_avx2_psign_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_psign_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_ssse3_phadd_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_phadd_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_ssse3_phadd_sw_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_phadd_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_phadd_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_phadd_sw, 16, 16, 16, 16, 16, 16) +PROCESS(x86_ssse3_phsub_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_phsub_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_ssse3_phsub_sw_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_phsub_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_phsub_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_phsub_sw, 16, 16, 16, 16, 16, 16) +PROCESS(x86_sse2_pmulh_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_pmulh_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_pmulh_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_pmulhu_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_pmulhu_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_pmulhu_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_pmadd_wd, 4, 32, 8, 16, 8, 16) +PROCESS(x86_avx2_pmadd_wd, 8, 32, 16, 16, 16, 16) +PROCESS(x86_avx512_pmaddw_d_512, 16, 32, 32, 16, 32, 16) +PROCESS(x86_ssse3_pmadd_ub_sw_128, 8, 16, 16, 8, 16, 8) +PROCESS(x86_avx2_pmadd_ub_sw, 16, 16, 32, 8, 32, 8) +PROCESS(x86_avx512_pmaddubs_w_512, 32, 16, 64, 8, 64, 8) +PROCESS(x86_sse2_packsswb_128, 16, 8, 8, 16, 8, 16) +PROCESS(x86_avx2_packsswb, 32, 8, 16, 16, 16, 16) +PROCESS(x86_avx512_packsswb_512, 64, 8, 32, 16, 32, 16) +PROCESS(x86_sse2_packuswb_128, 16, 8, 8, 16, 8, 16) +PROCESS(x86_avx2_packuswb, 32, 8, 16, 16, 16, 16) +PROCESS(x86_avx512_packuswb_512, 64, 8, 32, 16, 32, 16) +PROCESS(x86_sse2_packssdw_128, 8, 16, 4, 32, 4, 32) +PROCESS(x86_avx2_packssdw, 16, 16, 8, 32, 8, 32) +PROCESS(x86_avx512_packssdw_512, 32, 16, 16, 32, 16, 32) +PROCESS(x86_sse41_packusdw, 8, 16, 4, 32, 4, 32) +PROCESS(x86_avx2_packusdw, 16, 16, 8, 32, 8, 32) +PROCESS(x86_avx512_packusdw_512, 32, 16, 16, 32, 16, 32) +PROCESS(x86_sse2_psad_bw, 2, 64, 16, 8, 16, 8) +PROCESS(x86_avx2_psad_bw, 4, 64, 32, 8, 32, 8) +PROCESS(x86_avx512_psad_bw_512, 8, 64, 64, 8, 64, 8) diff --git a/ir/x86_intrinsics_terop.inc b/ir/x86_intrinsics_terop.inc new file mode 100644 index 000000000..de21d539f --- /dev/null +++ b/ir/x86_intrinsics_terop.inc @@ -0,0 +1 @@ +PROCESS(x86_avx2_pblendvb, 32, 8, 32, 8, 32, 8, 32, 8) diff --git a/llvm_util/llvm2alive.cpp b/llvm_util/llvm2alive.cpp index a029025c0..45289ce98 100644 --- a/llvm_util/llvm2alive.cpp +++ b/llvm_util/llvm2alive.cpp @@ -2,6 +2,7 @@ // Distributed under the MIT license that can be found in the LICENSE file. #include "llvm_util/llvm2alive.h" +#include "ir/x86_intrinsics.h" #include "llvm_util/known_fns.h" #include "llvm_util/utils.h" #include "util/sort.h" @@ -15,6 +16,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ModRef.h" #include @@ -1231,6 +1233,45 @@ class llvm2alive_ : public llvm::InstVisitor> { case llvm::Intrinsic::prefetch: return NOP(i); + // Intel X86 intrinsics +#define PROCESS(NAME, A, B, C, D, E, F) case llvm::Intrinsic::NAME: +#include "ir/x86_intrinsics_binop.inc" +#undef PROCESS + { + PARSE_BINOP(); + X86IntrinBinOp::Op op; + switch (i.getIntrinsicID()) { +#define PROCESS(NAME, A, B, C, D, E, F) \ + case llvm::Intrinsic::NAME: \ + op = X86IntrinBinOp::NAME; \ + break; +#include "ir/x86_intrinsics_binop.inc" +#undef PROCESS + default: + UNREACHABLE(); + } + return make_unique(*ty, value_name(i), *a, *b, op); + } + +#define PROCESS(NAME, A, B, C, D, E, F, G, H) case llvm::Intrinsic::NAME: +#include "ir/x86_intrinsics_terop.inc" +#undef PROCESS + { + PARSE_TRIOP(); + X86IntrinTerOp::Op op; + switch (i.getIntrinsicID()) { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) \ + case llvm::Intrinsic::NAME: \ + op = X86IntrinTerOp::NAME; \ + break; +#include "ir/x86_intrinsics_terop.inc" +#undef PROCESS + default: + UNREACHABLE(); + } + return make_unique(*ty, value_name(i), *a, *b, *c, op); + } + default: break; } diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll new file mode 100644 index 000000000..fc0b1aa36 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll new file mode 100644 index 000000000..cc4406a27 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll new file mode 100644 index 000000000..5c28e9b95 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %neg = sub <8 x i32> zeroinitializer, %v + ret <8 x i32> %neg +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll new file mode 100644 index 000000000..246786752 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + %2 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> ) + ret <8 x i32> %2 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %neg = sub <8 x i32> zeroinitializer, %v + ret <8 x i32> %neg +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll new file mode 100644 index 000000000..32ce38d0c --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll new file mode 100644 index 000000000..938f4cb4b --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll new file mode 100644 index 000000000..7365b7355 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll new file mode 100644 index 000000000..4e454a092 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll new file mode 100644 index 000000000..a5d6914b9 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll new file mode 100644 index 000000000..1e9fd5c72 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll new file mode 100644 index 000000000..d220fa17b --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll new file mode 100644 index 000000000..118c86192 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll new file mode 100644 index 000000000..3f6994fba --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> poison +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll new file mode 100644 index 000000000..ea86f827a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll new file mode 100644 index 000000000..4f23a76a3 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll new file mode 100644 index 000000000..28d79772c --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll @@ -0,0 +1,11 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + %tmp = lshr <4 x i64> %v, + ret <4 x i64> %tmp +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll new file mode 100644 index 000000000..00437e136 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + %tmp = lshr <4 x i64> %v, + ret <4 x i64> %tmp +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll new file mode 100644 index 000000000..abb3c758a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll new file mode 100644 index 000000000..a675c22df --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll new file mode 100644 index 000000000..66bec3e8a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll new file mode 100644 index 000000000..c561c94ef --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll new file mode 100644 index 000000000..f6b3a4803 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll new file mode 100644 index 000000000..5636e4de3 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> poison +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll new file mode 100644 index 000000000..236ef23c1 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll new file mode 100644 index 000000000..a299a1b67 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll new file mode 100644 index 000000000..9f15a7aa5 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + %tmp = lshr <16 x i16> %v, + ret <16 x i16> %tmp +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll new file mode 100644 index 000000000..193f599c9 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll @@ -0,0 +1,13 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + %tmp = lshr <16 x i16> %v, + ret <16 x i16> %tmp +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll new file mode 100644 index 000000000..26e70e2cd --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll new file mode 100644 index 000000000..3f1154eb2 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll new file mode 100644 index 000000000..a15297fab --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll new file mode 100644 index 000000000..eb33719ff --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined + diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll new file mode 100644 index 000000000..6098e6510 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll new file mode 100644 index 000000000..79ced610a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> poison +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll new file mode 100644 index 000000000..2d0ac7d6e --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll new file mode 100644 index 000000000..d7e55fa87 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll @@ -0,0 +1,16 @@ + + +define <32 x i8> @tgt() { +entry: + ret <32 x i8> +} +define <32 x i8> @src() { +entry: + %calltmp = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> , <32 x i8> ) + ret <32 x i8> %calltmp +} + + +declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) + + diff --git a/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll new file mode 100644 index 000000000..244451bd2 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll @@ -0,0 +1,11 @@ + +define <64 x i8> @tgt() { +entry: + ret <64 x i8> +} +define <64 x i8> @src() { +entry: + %calltmp = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> , <64 x i8> ) + ret <64 x i8> %calltmp +} +declare <64 x i8> @llvm.x86.avx512.pavg.b.512 (<64 x i8>, <64 x i8>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll new file mode 100644 index 000000000..edb6612e1 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src() { + %calltmp = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> , <8 x i16> ) + ret <16 x i16> %calltmp +} + +define <16 x i16> @tgt() { + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll new file mode 100644 index 000000000..1c59d5756 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src() { + %calltmp = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> , <4 x i32> ) + ret <8 x i32> %calltmp +} + +define <8 x i32> @tgt() { + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll new file mode 100644 index 000000000..210c834e7 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src() { + %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} + +define <16 x i16> @tgt() { + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll new file mode 100644 index 000000000..3cfbe4345 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src() { + %calltmp = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> , <8 x i32> ) + ret <8 x i32> %calltmp +} + +define <8 x i32> @tgt() { + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll new file mode 100644 index 000000000..a3fb533dc --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src() { + %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} + +define <16 x i16> @tgt() { + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll new file mode 100644 index 000000000..4e556ebae --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src() { + %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} + +define <16 x i16> @tgt() { + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll new file mode 100644 index 000000000..5e131ea8d --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src() { + %calltmp = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> , <8 x i32> ) + ret <8 x i32> %calltmp +} + +define <8 x i32> @tgt() { + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll new file mode 100644 index 000000000..ace2a13a5 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src() { + %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} + +define <16 x i16> @tgt() { + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll new file mode 100644 index 000000000..b19b61e9d --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> , <16 x i8> ) + ret <8 x i16> %calltmp +} + +define <8 x i16> @tgt() { + ret <8 x i16> +} + +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll new file mode 100644 index 000000000..23514bf8d --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src() { + %calltmp = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> , <32 x i8> ) + ret <16 x i16> %calltmp +} + +define <16 x i16> @tgt() { + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll new file mode 100644 index 000000000..c960195a6 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll @@ -0,0 +1,10 @@ +define <32 x i16> @src() { + %calltmp = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> , <64 x i8> ) + ret <32 x i16> %calltmp +} + +define <32 x i16> @tgt() { + ret <32 x i16> +} + +declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) diff --git a/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll b/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll new file mode 100644 index 000000000..469f6abae --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll @@ -0,0 +1,12 @@ +; ERROR: Value mismatch + +; test found by Stefan's random tester + +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> , <8 x i16> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { + ret <8 x i16> +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll new file mode 100644 index 000000000..42abb8ef7 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll @@ -0,0 +1,10 @@ +; test found by Stefan's random tester + +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> , <8 x i16> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { + ret <8 x i16> +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll new file mode 100644 index 000000000..8a39bd30e --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll new file mode 100644 index 000000000..837b32860 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll @@ -0,0 +1,11 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll new file mode 100644 index 000000000..4f1ffb526 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll new file mode 100644 index 000000000..074634471 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll new file mode 100644 index 000000000..51300dca4 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll new file mode 100644 index 000000000..da506e2c3 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll new file mode 100644 index 000000000..beaf1b44f --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll new file mode 100644 index 000000000..1a856df06 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll new file mode 100644 index 000000000..5c5449aa1 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> poison +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll new file mode 100644 index 000000000..3152e1440 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll new file mode 100644 index 000000000..9b8319a4d --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll new file mode 100644 index 000000000..c0c0efe6b --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll @@ -0,0 +1,11 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + %tmp = lshr <2 x i64> %v, + ret <2 x i64> %tmp +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll new file mode 100644 index 000000000..7f1b08191 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll @@ -0,0 +1,13 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + %tmp = lshr <2 x i64> %v, + ret <2 x i64> %tmp +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll new file mode 100644 index 000000000..51e949fd8 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll new file mode 100644 index 000000000..3198d8831 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll new file mode 100644 index 000000000..1edd0efeb --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll new file mode 100644 index 000000000..b3cdeeb84 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll new file mode 100644 index 000000000..9bbf111ff --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll new file mode 100644 index 000000000..127443d74 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> poison +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll new file mode 100644 index 000000000..78e237a4c --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll new file mode 100644 index 000000000..51e8ba278 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll new file mode 100644 index 000000000..f577df356 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + %tmp = lshr <8 x i16> %v, + ret <8 x i16> %tmp +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll new file mode 100644 index 000000000..81c57f18f --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + %tmp = lshr <8 x i16> %v, + ret <8 x i16> %tmp +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll new file mode 100644 index 000000000..24c2a90e0 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll new file mode 100644 index 000000000..33a2a9d38 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll new file mode 100644 index 000000000..caa32b663 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll new file mode 100644 index 000000000..a007979be --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined + diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll new file mode 100644 index 000000000..241d24a15 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll new file mode 100644 index 000000000..bc03df188 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> poison +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll new file mode 100644 index 000000000..29e04d604 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)