From ea9f294ad3fced811f6b97d11570a1da2bc899b5 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 3 Dec 2024 12:34:22 +0000 Subject: [PATCH 01/19] X86: add support for SSE2/AVX2 intrinsics Add X86 vector SSE2/AVX2 intrinsics to the IR, complete with formal semantics for the verification flow. The work was originally authored by Zhengyang Liu , and was motivated by the development of the Minotaur project [https://arxiv.org/abs/2306.00229]. --- CMakeLists.txt | 1 + ir/instr.cpp | 16 - ir/instr.h | 23 + ir/x86_intrinsics.cpp | 740 ++++++++++++++++++ ir/x86_intrinsics.h | 141 ++++ ir/x86_intrinsics_binop.inc | 135 ++++ ir/x86_intrinsics_terop.inc | 1 + llvm_util/known_fns.cpp | 11 +- llvm_util/llvm2alive.cpp | 41 + .../vector/x86/avx2_psign_w-0.srctgt.ll | 10 + .../vector/x86/avx2_psign_w-1.srctgt.ll | 10 + .../vector/x86/avx2_psign_w-2.srctgt.ll | 11 + .../vector/x86/avx2_psign_w-3.srctgt.ll | 12 + .../vector/x86/avx2_psrl_d-0.srctgt.ll | 10 + .../vector/x86/avx2_psrl_d-15.srctgt.ll | 13 + .../vector/x86/avx2_psrl_d-3.srctgt.ll | 13 + .../x86/avx2_psrl_d-overflow1.srctgt.ll | 10 + .../x86/avx2_psrl_d-overflow2.srctgt.ll | 10 + .../x86/avx2_psrl_d-overflow3.srctgt.ll | 10 + .../x86/avx2_psrl_d-overflow4.srctgt.ll | 12 + .../x86/avx2_psrl_d-overflow5.srctgt.ll | 13 + .../vector/x86/avx2_psrl_d-poison1.srctgt.ll | 10 + .../vector/x86/avx2_psrl_d-poison2.srctgt.ll | 10 + .../vector/x86/avx2_psrl_q-0.srctgt.ll | 10 + .../vector/x86/avx2_psrl_q-15.srctgt.ll | 13 + .../vector/x86/avx2_psrl_q-3.srctgt.ll | 13 + .../x86/avx2_psrl_q-overflow1.srctgt.ll | 10 + .../x86/avx2_psrl_q-overflow2.srctgt.ll | 10 + .../x86/avx2_psrl_q-overflow3.srctgt.ll | 10 + .../x86/avx2_psrl_q-overflow4.srctgt.ll | 12 + .../x86/avx2_psrl_q-overflow5.srctgt.ll | 12 + .../vector/x86/avx2_psrl_q-poison1.srctgt.ll | 10 + .../vector/x86/avx2_psrl_q-poison2.srctgt.ll | 10 + .../vector/x86/avx2_psrl_w-0.srctgt.ll | 10 + .../vector/x86/avx2_psrl_w-15.srctgt.ll | 13 + .../vector/x86/avx2_psrl_w-3.srctgt.ll | 13 + .../x86/avx2_psrl_w-overflow1.srctgt.ll | 10 + .../x86/avx2_psrl_w-overflow2.srctgt.ll | 10 + .../x86/avx2_psrl_w-overflow3.srctgt.ll | 10 + .../x86/avx2_psrl_w-overflow4.srctgt.ll | 13 + .../x86/avx2_psrl_w-overflow5.srctgt.ll | 12 + .../vector/x86/avx2_psrl_w-poison1.srctgt.ll | 10 + .../vector/x86/avx2_psrl_w-poison2.srctgt.ll | 10 + .../vector/x86/regression-feb-8-01.srctgt.ll | 16 + .../vector/x86/regression-feb-8-02.srctgt.ll | 11 + .../vector/x86/regression-feb-9-01.srctgt.ll | 11 + .../vector/x86/regression-feb-9-02.srctgt.ll | 11 + .../vector/x86/regression-feb-9-03.srctgt.ll | 11 + .../vector/x86/regression-feb-9-04.srctgt.ll | 11 + .../vector/x86/regression-feb-9-05.srctgt.ll | 11 + .../vector/x86/regression-feb-9-06.srctgt.ll | 11 + .../vector/x86/regression-feb-9-07.srctgt.ll | 11 + .../vector/x86/regression-feb-9-08.srctgt.ll | 11 + .../vector/x86/regression-feb-9-09.srctgt.ll | 11 + .../vector/x86/regression-feb-9-10.srctgt.ll | 12 + .../vector/x86/regression-feb-9-11.srctgt.ll | 11 + .../vector/x86/sse2_pavg_w_0-failed.srctgt.ll | 12 + .../vector/x86/sse2_pavg_w_0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_d-0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_d-15.srctgt.ll | 13 + .../vector/x86/sse2_psrl_d-3.srctgt.ll | 13 + .../x86/sse2_psrl_d-overflow1.srctgt.ll | 10 + .../x86/sse2_psrl_d-overflow2.srctgt.ll | 10 + .../x86/sse2_psrl_d-overflow3.srctgt.ll | 10 + .../x86/sse2_psrl_d-overflow4.srctgt.ll | 12 + .../x86/sse2_psrl_d-overflow5.srctgt.ll | 13 + .../vector/x86/sse2_psrl_d-poison1.srctgt.ll | 10 + .../vector/x86/sse2_psrl_d-poison2.srctgt.ll | 10 + .../vector/x86/sse2_psrl_q-0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_q-15.srctgt.ll | 13 + .../vector/x86/sse2_psrl_q-3.srctgt.ll | 13 + .../x86/sse2_psrl_q-overflow1.srctgt.ll | 10 + .../x86/sse2_psrl_q-overflow2.srctgt.ll | 10 + .../x86/sse2_psrl_q-overflow3.srctgt.ll | 10 + .../x86/sse2_psrl_q-overflow4.srctgt.ll | 12 + .../x86/sse2_psrl_q-overflow5.srctgt.ll | 12 + .../vector/x86/sse2_psrl_q-poison1.srctgt.ll | 10 + .../vector/x86/sse2_psrl_q-poison2.srctgt.ll | 10 + .../vector/x86/sse2_psrl_w-0.srctgt.ll | 10 + .../vector/x86/sse2_psrl_w-15.srctgt.ll | 13 + .../vector/x86/sse2_psrl_w-3.srctgt.ll | 13 + .../x86/sse2_psrl_w-overflow1.srctgt.ll | 10 + .../x86/sse2_psrl_w-overflow2.srctgt.ll | 10 + .../x86/sse2_psrl_w-overflow3.srctgt.ll | 10 + .../x86/sse2_psrl_w-overflow4.srctgt.ll | 13 + .../x86/sse2_psrl_w-overflow5.srctgt.ll | 12 + .../vector/x86/sse2_psrl_w-poison1.srctgt.ll | 10 + .../vector/x86/sse2_psrl_w-poison2.srctgt.ll | 10 + 88 files changed, 1970 insertions(+), 17 deletions(-) create mode 100644 ir/x86_intrinsics.cpp create mode 100644 ir/x86_intrinsics.h create mode 100644 ir/x86_intrinsics_binop.inc create mode 100644 ir/x86_intrinsics_terop.inc create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll create mode 100644 tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll diff --git a/CMakeLists.txt b/CMakeLists.txt index 76c6ca657..a8fdd3c75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -104,6 +104,7 @@ set(IR_SRCS ir/state_value.cpp ir/type.cpp ir/value.cpp + ir/x86_intrinsics.cpp ) add_library(ir STATIC ${IR_SRCS}) diff --git a/ir/instr.cpp b/ir/instr.cpp index f1ea89cc7..a21ad0825 100644 --- a/ir/instr.cpp +++ b/ir/instr.cpp @@ -19,22 +19,6 @@ using namespace smt; using namespace util; using namespace std; -#define RAUW(val) \ - if (val == &what) \ - val = &with -#define DEFINE_AS_RETZERO(cls, method) \ - uint64_t cls::method() const { return 0; } -#define DEFINE_AS_RETZEROALIGN(cls, method) \ - pair cls::method() const { return { 0, 1 }; } -#define DEFINE_AS_RETFALSE(cls, method) \ - bool cls::method() const { return false; } -#define DEFINE_AS_EMPTYACCESS(cls) \ - MemInstr::ByteAccessInfo cls::getByteAccessInfo() const \ - { return {}; } - -// log2 of max number of var args per function -#define VARARG_BITS 8 - namespace { struct print_type { IR::Type &ty; diff --git a/ir/instr.h b/ir/instr.h index 146cb97ed..f3c4d41a5 100644 --- a/ir/instr.h +++ b/ir/instr.h @@ -9,6 +9,29 @@ #include #include +#define RAUW(val) \ + if (val == &what) \ + val = &with +#define DEFINE_AS_RETZERO(cls, method) \ + uint64_t cls::method() const { \ + return 0; \ + } +#define DEFINE_AS_RETZEROALIGN(cls, method) \ + pair cls::method() const { \ + return {0, 1}; \ + } +#define DEFINE_AS_RETFALSE(cls, method) \ + bool cls::method() const { \ + return false; \ + } +#define DEFINE_AS_EMPTYACCESS(cls) \ + MemInstr::ByteAccessInfo cls::getByteAccessInfo() const { \ + return {}; \ + } + +// log2 of max number of var args per function +#define VARARG_BITS 8 + namespace IR { class Function; diff --git a/ir/x86_intrinsics.cpp b/ir/x86_intrinsics.cpp new file mode 100644 index 000000000..ba0bd1e60 --- /dev/null +++ b/ir/x86_intrinsics.cpp @@ -0,0 +1,740 @@ +#include "ir/x86_intrinsics.h" +#include "smt/expr.h" + +using namespace smt; +using namespace std; + +namespace IR { +vector FakeShuffle::operands() const { + return {v1, v2, mask}; +} + +bool FakeShuffle::propagatesPoison() const { + return false; +} + +bool FakeShuffle::hasSideEffects() const { + return false; +} + +void FakeShuffle::rauw(const Value &what, Value &with) { + RAUW(v1); + RAUW(v2); + RAUW(mask); +} + +void FakeShuffle::print(ostream &os) const { + os << getName() << " = fakesv " << *v1 << ", " << *v2 << ", " << *mask; +} + +StateValue FakeShuffle::toSMT(State &s) const { + auto vty = + static_cast(v1->getType().getAsAggregateType()); + auto mty = mask->getType().getAsAggregateType(); + auto sz = vty->numElementsConst(); + vector vals; + + for (unsigned i = 0, e = mty->numElementsConst(); i != e; ++i) { + auto [m_v, m_p] = mty->extract(s[*mask], i); + expr bound = expr::mkUInt(sz, m_v); + expr idx = m_v.urem(bound); + auto [v1v, v1p] = vty->extract(s[*v1], idx); + auto [v2v, v2p] = vty->extract(s[*v2], idx); + expr v = expr::mkIf(m_v.ult(bound), v1v, v2v); + expr np = expr::mkIf(m_v.ult(bound), v1p, v2p); + expr inbounds = m_v.ult(expr::mkUInt(vty->numElementsConst() * 2, m_v)); + + vals.emplace_back(std::move(v), inbounds && np); + } + + return getType().getAsAggregateType()->aggregateVals(vals); +} + +expr FakeShuffle::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + getType().enforceVectorTypeSameChildTy(v1->getType()) && + getType().getAsAggregateType()->numElements() == + mask->getType().getAsAggregateType()->numElements() && + v1->getType().enforceVectorType() && v1->getType() == v2->getType() && + mask->getType().enforceVectorType(); +} + +unique_ptr FakeShuffle::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *v1, *v2, + *mask); +} + +vector X86IntrinBinOp::operands() const { + return {a, b}; +} + +bool X86IntrinBinOp::propagatesPoison() const { + return true; +} + +bool X86IntrinBinOp::hasSideEffects() const { + return false; +} + +void X86IntrinBinOp::rauw(const Value &what, Value &with) { + RAUW(a); + RAUW(b); +} + +string X86IntrinBinOp::getOpName(Op op) { + switch (op) { +#define PROCESS(NAME, A, B, C, D, E, F) \ + case NAME: \ + return #NAME; +#include "x86_intrinsics_binop.inc" +#undef PROCESS + } + UNREACHABLE(); +} + +void X86IntrinBinOp::print(ostream &os) const { + os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; +} + +StateValue X86IntrinBinOp::toSMT(State &s) const { + auto rty = getType().getAsAggregateType(); + auto aty = a->getType().getAsAggregateType(); + auto bty = b->getType().getAsAggregateType(); + auto &av = s[*a]; + auto &bv = s[*b]; + + switch (op) { + // shift by one variable + case x86_sse2_psrl_w: + case x86_sse2_psrl_d: + case x86_sse2_psrl_q: + case x86_avx2_psrl_w: + case x86_avx2_psrl_d: + case x86_avx2_psrl_q: + case x86_avx512_psrl_w_512: + case x86_avx512_psrl_d_512: + case x86_avx512_psrl_q_512: + case x86_sse2_psra_w: + case x86_sse2_psra_d: + case x86_avx2_psra_w: + case x86_avx2_psra_d: + case x86_avx512_psra_q_128: + case x86_avx512_psra_q_256: + case x86_avx512_psra_w_512: + case x86_avx512_psra_d_512: + case x86_avx512_psra_q_512: + case x86_sse2_psll_w: + case x86_sse2_psll_d: + case x86_sse2_psll_q: + case x86_avx2_psll_w: + case x86_avx2_psll_d: + case x86_avx2_psll_q: + case x86_avx512_psll_w_512: + case x86_avx512_psll_d_512: + case x86_avx512_psll_q_512: { + vector vals; + unsigned elem_bw = bty->getChild(0).bits(); + + expr shift_np = true; + expr shift_v; + // extract lower 64 bits from b + for (unsigned i = 0, e = 64 / elem_bw; i != e; ++i) { + StateValue vv = bty->extract(bv, i); + shift_v = (i == 0) ? vv.value : vv.value.concat(shift_v); + // if any elements in lower 64 bits is poison, the result is poison + shift_np &= vv.non_poison; + } + function fn; + switch (op) { + case x86_sse2_psrl_w: + case x86_sse2_psrl_d: + case x86_sse2_psrl_q: + case x86_avx2_psrl_w: + case x86_avx2_psrl_d: + case x86_avx2_psrl_q: + case x86_avx512_psrl_w_512: + case x86_avx512_psrl_d_512: + case x86_avx512_psrl_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkUInt(0, elem_bw), a.lshr(b)); + }; + break; + case x86_sse2_psra_w: + case x86_sse2_psra_d: + case x86_avx2_psra_w: + case x86_avx2_psra_d: + case x86_avx512_psra_q_128: + case x86_avx512_psra_q_256: + case x86_avx512_psra_w_512: + case x86_avx512_psra_d_512: + case x86_avx512_psra_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkIf(a.isNegative(), expr::mkUInt(-1, elem_bw), + expr::mkUInt(0, elem_bw)), + a.ashr(b)); + }; + break; + case x86_sse2_psll_w: + case x86_sse2_psll_d: + case x86_sse2_psll_q: + case x86_avx2_psll_w: + case x86_avx2_psll_d: + case x86_avx2_psll_q: + case x86_avx512_psll_w_512: + case x86_avx512_psll_d_512: + case x86_avx512_psll_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkUInt(0, elem_bw), a << b); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = aty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + expr shift = fn(ai.value, shift_v.trunc(elem_bw)); + vals.emplace_back(std::move(shift), shift_np && ai.non_poison); + } + return rty->aggregateVals(vals); + } + // vertical + case x86_sse2_pavg_w: + case x86_sse2_pavg_b: + case x86_avx2_pavg_w: + case x86_avx2_pavg_b: + case x86_avx512_pavg_w_512: + case x86_avx512_pavg_b_512: + case x86_ssse3_psign_b_128: + case x86_ssse3_psign_w_128: + case x86_ssse3_psign_d_128: + case x86_avx2_psign_b: + case x86_avx2_psign_w: + case x86_avx2_psign_d: + case x86_avx2_psrlv_d: + case x86_avx2_psrlv_d_256: + case x86_avx2_psrlv_q: + case x86_avx2_psrlv_q_256: + case x86_avx512_psrlv_d_512: + case x86_avx512_psrlv_q_512: + case x86_avx512_psrlv_w_128: + case x86_avx512_psrlv_w_256: + case x86_avx512_psrlv_w_512: + case x86_avx2_psrav_d: + case x86_avx2_psrav_d_256: + case x86_avx512_psrav_d_512: + case x86_avx512_psrav_q_128: + case x86_avx512_psrav_q_256: + case x86_avx512_psrav_q_512: + case x86_avx512_psrav_w_128: + case x86_avx512_psrav_w_256: + case x86_avx512_psrav_w_512: + case x86_avx2_psllv_d: + case x86_avx2_psllv_d_256: + case x86_avx2_psllv_q: + case x86_avx2_psllv_q_256: + case x86_avx512_psllv_d_512: + case x86_avx512_psllv_q_512: + case x86_avx512_psllv_w_128: + case x86_avx512_psllv_w_256: + case x86_avx512_psllv_w_512: + case x86_sse2_pmulh_w: + case x86_avx2_pmulh_w: + case x86_avx512_pmulh_w_512: + case x86_sse2_pmulhu_w: + case x86_avx2_pmulhu_w: + case x86_avx512_pmulhu_w_512: { + vector vals; + function fn; + switch (op) { + case x86_sse2_pavg_w: + case x86_sse2_pavg_b: + case x86_avx2_pavg_w: + case x86_avx2_pavg_b: + case x86_avx512_pavg_w_512: + case x86_avx512_pavg_b_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return (a.zext(1) + b.zext(1) + expr::mkUInt(1, bw + 1)) + .lshr(expr::mkUInt(1, bw + 1)) + .trunc(bw); + }; + break; + case x86_ssse3_psign_b_128: + case x86_ssse3_psign_w_128: + case x86_ssse3_psign_d_128: + case x86_avx2_psign_b: + case x86_avx2_psign_w: + case x86_avx2_psign_d: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf( + b.isZero(), b, + expr::mkIf(b.isNegative(), expr::mkUInt(0, a.bits()) - a, a)); + }; + break; + case x86_avx2_psrlv_d: + case x86_avx2_psrlv_d_256: + case x86_avx2_psrlv_q: + case x86_avx2_psrlv_q_256: + case x86_avx512_psrlv_d_512: + case x86_avx512_psrlv_q_512: + case x86_avx512_psrlv_w_128: + case x86_avx512_psrlv_w_256: + case x86_avx512_psrlv_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), expr::mkUInt(0, bw), + a.lshr(b)); + }; + break; + case x86_avx2_psrav_d: + case x86_avx2_psrav_d_256: + case x86_avx512_psrav_d_512: + case x86_avx512_psrav_q_128: + case x86_avx512_psrav_q_256: + case x86_avx512_psrav_q_512: + case x86_avx512_psrav_w_128: + case x86_avx512_psrav_w_256: + case x86_avx512_psrav_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), + expr::mkIf(a.isNegative(), expr::mkUInt(-1, bw), + expr::mkUInt(0, bw)), + a.ashr(b)); + }; + break; + case x86_avx2_psllv_d: + case x86_avx2_psllv_d_256: + case x86_avx2_psllv_q: + case x86_avx2_psllv_q_256: + case x86_avx512_psllv_d_512: + case x86_avx512_psllv_q_512: + case x86_avx512_psllv_w_128: + case x86_avx512_psllv_w_256: + case x86_avx512_psllv_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), expr::mkUInt(0, bw), + a << b); + }; + break; + case x86_sse2_pmulh_w: + case x86_avx2_pmulh_w: + case x86_avx512_pmulh_w_512: + fn = [&](auto a, auto b) -> expr { + expr mul = a.sext(16) * b.sext(16); + return mul.extract(31, 16); + }; + break; + case x86_sse2_pmulhu_w: + case x86_avx2_pmulhu_w: + case x86_avx512_pmulhu_w_512: + fn = [&](auto a, auto b) -> expr { + expr mul = a.zext(16) * b.zext(16); + return mul.extract(31, 16); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = rty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + auto bi = bty->extract(bv, i); + vals.emplace_back(fn(ai.value, bi.value), ai.non_poison && bi.non_poison); + } + return rty->aggregateVals(vals); + } + // pshuf.b + case x86_ssse3_pshuf_b_128: + case x86_avx2_pshuf_b: + case x86_avx512_pshuf_b_512: { + auto avty = static_cast(aty); + vector vals; + unsigned laneCount = shape_ret[op].first; + for (unsigned i = 0; i != laneCount; ++i) { + auto [b, bp] = bty->extract(bv, i); + expr id = (b & expr::mkUInt(0x0F, 8)) + (expr::mkUInt(i & 0x30, 8)); + auto [r, rp] = avty->extract(av, id); + auto ai = expr::mkIf(b.extract(7, 7) == expr::mkUInt(0, 1), r, + expr::mkUInt(0, 8)); + + vals.emplace_back(std::move(ai), bp && rp); + } + return rty->aggregateVals(vals); + } + // horizontal + case x86_ssse3_phadd_w_128: + case x86_ssse3_phadd_d_128: + case x86_ssse3_phadd_sw_128: + case x86_avx2_phadd_w: + case x86_avx2_phadd_d: + case x86_avx2_phadd_sw: + case x86_ssse3_phsub_w_128: + case x86_ssse3_phsub_d_128: + case x86_ssse3_phsub_sw_128: + case x86_avx2_phsub_w: + case x86_avx2_phsub_d: + case x86_avx2_phsub_sw: { + vector vals; + unsigned laneCount = shape_ret[op].first; + unsigned groupsize = 128 / shape_ret[op].second; + function fn; + switch (op) { + case x86_ssse3_phadd_w_128: + case x86_ssse3_phadd_d_128: + case x86_avx2_phadd_w: + case x86_avx2_phadd_d: + fn = [&](auto a, auto b) -> expr { return a + b; }; + break; + case x86_ssse3_phadd_sw_128: + case x86_avx2_phadd_sw: + fn = [&](auto a, auto b) -> expr { return a.sadd_sat(b); }; + break; + case x86_ssse3_phsub_w_128: + case x86_ssse3_phsub_d_128: + case x86_avx2_phsub_w: + case x86_avx2_phsub_d: + fn = [&](auto a, auto b) -> expr { return a - b; }; + break; + case x86_ssse3_phsub_sw_128: + case x86_avx2_phsub_sw: + fn = [&](auto a, auto b) -> expr { return a.ssub_sat(b); }; + break; + default: + UNREACHABLE(); + } + for (unsigned j = 0; j != laneCount / groupsize; j++) { + for (unsigned i = 0; i != groupsize; i += 2) { + auto [a1, p1] = aty->extract(av, j * groupsize + i); + auto [a2, p2] = aty->extract(av, j * groupsize + i + 1); + vals.emplace_back(fn(a1, a2), p1 && p2); + } + for (unsigned i = 0; i != groupsize; i += 2) { + auto [b1, p1] = aty->extract(bv, j * groupsize + i); + auto [b2, p2] = aty->extract(bv, j * groupsize + i + 1); + vals.emplace_back(fn(b1, b2), p1 && p2); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_psrli_w: + case x86_sse2_psrli_d: + case x86_sse2_psrli_q: + case x86_avx2_psrli_w: + case x86_avx2_psrli_d: + case x86_avx2_psrli_q: + case x86_avx512_psrli_w_512: + case x86_avx512_psrli_d_512: + case x86_avx512_psrli_q_512: + case x86_sse2_psrai_w: + case x86_sse2_psrai_d: + case x86_avx2_psrai_w: + case x86_avx2_psrai_d: + case x86_avx512_psrai_w_512: + case x86_avx512_psrai_d_512: + case x86_avx512_psrai_q_128: + case x86_avx512_psrai_q_256: + case x86_avx512_psrai_q_512: + case x86_sse2_pslli_w: + case x86_sse2_pslli_d: + case x86_sse2_pslli_q: + case x86_avx2_pslli_w: + case x86_avx2_pslli_d: + case x86_avx2_pslli_q: + case x86_avx512_pslli_w_512: + case x86_avx512_pslli_d_512: + case x86_avx512_pslli_q_512: { + vector vals; + function fn; + switch (op) { + case x86_sse2_psrai_w: + case x86_sse2_psrai_d: + case x86_avx2_psrai_w: + case x86_avx2_psrai_d: + case x86_avx512_psrai_w_512: + case x86_avx512_psrai_d_512: + case x86_avx512_psrai_q_128: + case x86_avx512_psrai_q_256: + case x86_avx512_psrai_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkIf(a.isNegative(), expr::mkInt(-1, sz_a), + expr::mkUInt(0, sz_a)); + expr inbounds = a.ashr(b.zextOrTrunc(sz_a)); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + case x86_sse2_psrli_w: + case x86_sse2_psrli_d: + case x86_sse2_psrli_q: + case x86_avx2_psrli_w: + case x86_avx2_psrli_d: + case x86_avx2_psrli_q: + case x86_avx512_psrli_w_512: + case x86_avx512_psrli_d_512: + case x86_avx512_psrli_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkUInt(0, sz_a); + expr inbounds = a.lshr(b.zextOrTrunc(sz_a)); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + case x86_sse2_pslli_w: + case x86_sse2_pslli_d: + case x86_sse2_pslli_q: + case x86_avx2_pslli_w: + case x86_avx2_pslli_d: + case x86_avx2_pslli_q: + case x86_avx512_pslli_w_512: + case x86_avx512_pslli_d_512: + case x86_avx512_pslli_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkUInt(0, sz_a); + expr inbounds = a << b.zextOrTrunc(sz_a); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = rty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + vals.emplace_back(fn(ai.value, bv.value), ai.non_poison && bv.non_poison); + } + return rty->aggregateVals(vals); + } + case x86_sse2_pmadd_wd: + case x86_avx2_pmadd_wd: + case x86_avx512_pmaddw_d_512: + case x86_ssse3_pmadd_ub_sw_128: + case x86_avx2_pmadd_ub_sw: + case x86_avx512_pmaddubs_w_512: { + vector vals; + for (unsigned i = 0, e = shape_ret[op].first; i != e; ++i) { + auto [a1, a1p] = aty->extract(av, i * 2); + auto [a2, a2p] = aty->extract(av, i * 2 + 1); + auto [b1, b1p] = bty->extract(bv, i * 2); + auto [b2, b2p] = bty->extract(bv, i * 2 + 1); + + auto np = a1p && a2p && b1p && b2p; + + if (op == x86_sse2_pmadd_wd || op == x86_avx2_pmadd_wd || + op == x86_avx512_pmaddw_d_512) { + expr v = a1.sext(16) * b1.sext(16) + a2.sext(16) * b2.sext(16); + vals.emplace_back(std::move(v), std::move(np)); + } else { + expr v = (a1.zext(8) * b1.sext(8)).sadd_sat(a2.zext(8) * b2.sext(8)); + vals.emplace_back(std::move(v), std::move(np)); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_packsswb_128: + case x86_avx2_packsswb: + case x86_avx512_packsswb_512: + case x86_sse2_packuswb_128: + case x86_avx2_packuswb: + case x86_avx512_packuswb_512: + case x86_sse2_packssdw_128: + case x86_avx2_packssdw: + case x86_avx512_packssdw_512: + case x86_sse41_packusdw: + case x86_avx2_packusdw: + case x86_avx512_packusdw_512: { + vector vals; + function fn; + if (op == x86_sse2_packsswb_128 || op == x86_avx2_packsswb || + op == x86_avx512_packsswb_512 || op == x86_sse2_packssdw_128 || + op == x86_avx2_packssdw || op == x86_avx512_packssdw_512) { + fn = [&](auto a) -> expr { + unsigned bw = a.bits() / 2; + auto min = expr::IntSMin(bw); + auto max = expr::IntSMax(bw); + return expr::mkIf(a.sle(min.sext(bw)), min, + expr::mkIf(a.sge(max.sext(bw)), max, a.trunc(bw))); + }; + } else { + fn = [&](auto a) -> expr { + unsigned bw = a.bits() / 2; + auto max = expr::IntUMax(bw); + auto zero = expr::mkUInt(0, bw); + return expr::mkIf(a.sle(zero.zext(bw)), zero, + expr::mkIf(a.sge(max.zext(bw)), max, a.trunc(bw))); + }; + } + + unsigned groupsize = 128 / shape_op1[op].second; + unsigned laneCount = shape_op1[op].first; + for (unsigned j = 0; j != laneCount / groupsize; j++) { + for (unsigned i = 0; i != groupsize; i++) { + auto [a1, p1] = aty->extract(av, j * groupsize + i); + vals.emplace_back(fn(std::move(a1)), std::move(p1)); + } + for (unsigned i = 0; i != groupsize; i++) { + auto [b1, p1] = aty->extract(bv, j * groupsize + i); + vals.emplace_back(fn(std::move(b1)), std::move(p1)); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_psad_bw: + case x86_avx2_psad_bw: + case x86_avx512_psad_bw_512: { + unsigned ngroup = shape_ret[op].first; + vector vals; + for (unsigned j = 0; j < ngroup; ++j) { + expr np = true; + expr v; + for (unsigned i = 0; i < 8; ++i) { + auto [a, ap] = aty->extract(av, 8 * j + i); + auto [b, bp] = bty->extract(bv, 8 * j + i); + np = np && ap && bp; + if (i == 0) + v = (a.zext(8) - b.zext(8)).abs(); + else + v = v + (a.zext(8) - b.zext(8)).abs(); + } + vals.emplace_back(v.zext(48), std::move(np)); + } + return rty->aggregateVals(vals); + } + } + UNREACHABLE(); +} + +expr X86IntrinBinOp::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + (shape_op0[op].first != 1 + ? a->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op0[op].second); + }) && + a->getType().getAsAggregateType()->numElements() == + shape_op0[op].first + : a->getType().enforceIntType(shape_op0[op].second)) && + (shape_op1[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op1[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + shape_op1[op].first + : b->getType().enforceIntType(shape_op1[op].second)) && + (shape_ret[op].first != 1 + ? getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_ret[op].second); + }) && + getType().getAsAggregateType()->numElements() == + shape_ret[op].first + : getType().enforceIntType(shape_ret[op].second)); +} + +unique_ptr X86IntrinBinOp::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *a, *b, op); +} + +string X86IntrinTerOp::getOpName(Op op) { + switch (op) { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) \ + case NAME: \ + return #NAME; +#include "x86_intrinsics_terop.inc" +#undef PROCESS + } + UNREACHABLE(); +} + +void X86IntrinTerOp::print(ostream &os) const { + os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; +} + +StateValue X86IntrinTerOp::toSMT(State &s) const { + auto rty = getType().getAsAggregateType(); + auto aty = a->getType().getAsAggregateType(); + auto bty = b->getType().getAsAggregateType(); + auto cty = c->getType().getAsAggregateType(); + auto &av = s[*a]; + auto &bv = s[*b]; + auto &cv = s[*c]; + + switch (op) { + case x86_avx2_pblendvb: { + vector vals; + + for (int i = 0; i < 32; ++i) { + auto [a, ap] = aty->extract(av, i); + auto [b, bp] = bty->extract(bv, i); + auto [c, cp] = cty->extract(cv, i); + auto v = expr::mkIf(c.extract(7, 7) == expr::mkUInt(0, 1), a, b); + vals.emplace_back(std::move(v), ap && bp && cp); + } + return rty->aggregateVals(vals); + } + } + UNREACHABLE(); +} + +expr X86IntrinTerOp::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + (shape_op0[op].first != 1 + ? a->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op0[op].second); + }) && + a->getType().getAsAggregateType()->numElements() == + shape_op0[op].first + : a->getType().enforceIntType(shape_op0[op].second)) && + (shape_op1[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op1[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + shape_op1[op].first + : b->getType().enforceIntType(shape_op1[op].second)) && + (shape_op2[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op2[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + shape_op2[op].first + : b->getType().enforceIntType(shape_op2[op].second)) && + (shape_ret[op].first != 1 + ? getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_ret[op].second); + }) && + getType().getAsAggregateType()->numElements() == + shape_ret[op].first + : getType().enforceIntType(shape_ret[op].second)); +} + +unique_ptr X86IntrinTerOp::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *a, *b, *c, + op); +} + +vector X86IntrinTerOp::operands() const { + return {a, b, c}; +} + +bool X86IntrinTerOp::propagatesPoison() const { + return true; +} + +bool X86IntrinTerOp::hasSideEffects() const { + return false; +} + +void X86IntrinTerOp::rauw(const Value &what, Value &with) { + RAUW(a); + RAUW(b); + RAUW(c); +} +} // namespace IR diff --git a/ir/x86_intrinsics.h b/ir/x86_intrinsics.h new file mode 100644 index 000000000..eee448d58 --- /dev/null +++ b/ir/x86_intrinsics.h @@ -0,0 +1,141 @@ +#pragma once + +#include "ir/instr.h" + +namespace IR { +class FakeShuffle final : public Instr { + Value *v1, *v2, *mask; + +public: + FakeShuffle(Type &type, std::string &&name, Value &v1, Value &v2, Value &mask) + : Instr(type, std::move(name)), v1(&v1), v2(&v2), mask(&mask) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; + +class X86IntrinBinOp final : public Instr { +public: + static constexpr unsigned numOfX86Intrinsics = 135; + enum Op { +#define PROCESS(NAME, A, B, C, D, E, F) NAME, +#include "x86_intrinsics_binop.inc" +#undef PROCESS + }; + + // the shape of a vector is stored as <# of lanes, element bits> + static constexpr std::array, numOfX86Intrinsics> + shape_op0 = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(C, D), +#include "x86_intrinsics_binop.inc" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_op1 = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(E, F), +#include "x86_intrinsics_binop.inc" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_ret = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(A, B), +#include "x86_intrinsics_binop.inc" +#undef PROCESS + }; + static constexpr std::array ret_width = { +#define PROCESS(NAME, A, B, C, D, E, F) A *B, +#include "x86_intrinsics_binop.inc" +#undef PROCESS + }; + +private: + Value *a, *b; + Op op; + +public: + static unsigned getRetWidth(Op op) { + return ret_width[op]; + } + X86IntrinBinOp(Type &type, std::string &&name, Value &a, Value &b, Op op) + : Instr(type, std::move(name)), a(&a), b(&b), op(op) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + static std::string getOpName(Op op); + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; + +class X86IntrinTerOp final : public Instr { +public: + static constexpr unsigned numOfX86Intrinsics = 1; + enum Op { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) NAME, +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + + // the shape of a vector is stored as <# of lanes, element bits> + static constexpr std::array, numOfX86Intrinsics> + shape_op0 = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(C, D), +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_op1 = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(E, F), +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_op2 = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(G, H), +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_ret = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(A, B), +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + static constexpr std::array ret_width = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) A *B, +#include "x86_intrinsics_terop.inc" +#undef PROCESS + }; + +private: + Value *a, *b, *c; + Op op; + +public: + static unsigned getRetWidth(Op op) { + return ret_width[op]; + } + X86IntrinTerOp(Type &type, std::string &&name, Value &a, Value &b, Value &c, + Op op) + : Instr(type, std::move(name)), a(&a), b(&b), c(&c), op(op) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + static std::string getOpName(Op op); + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; +} // namespace IR diff --git a/ir/x86_intrinsics_binop.inc b/ir/x86_intrinsics_binop.inc new file mode 100644 index 000000000..d44fbfef6 --- /dev/null +++ b/ir/x86_intrinsics_binop.inc @@ -0,0 +1,135 @@ +PROCESS(x86_sse2_pavg_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_pavg_b, 16, 8, 16, 8, 16, 8) +PROCESS(x86_avx2_pavg_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_pavg_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_avx512_pavg_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_avx512_pavg_b_512, 64, 8, 64, 8, 64, 8) +PROCESS(x86_avx2_pshuf_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_ssse3_pshuf_b_128, 16, 8, 16, 8, 16, 8) +PROCESS(x86_avx512_pshuf_b_512, 64, 8, 64, 8, 64, 8) +PROCESS(x86_sse2_psrl_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psrl_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_sse2_psrl_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psrl_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psrl_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx2_psrl_q, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psrl_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psrl_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psrl_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_psrli_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_psrli_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_sse2_psrli_q, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx2_psrli_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_psrli_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx2_psrli_q, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_psrli_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_psrli_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_psrli_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psrlv_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psrlv_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_psrlv_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psrlv_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psrlv_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psrlv_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psrlv_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psrlv_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psrlv_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_psra_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psra_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psra_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psra_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx512_psra_q_128, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx512_psra_q_256, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psra_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psra_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psra_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_psrai_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_psrai_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_avx2_psrai_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_psrai_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx512_psrai_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_psrai_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_psrai_q_128, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx512_psrai_q_256, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_psrai_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psrav_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psrav_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx512_psrav_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psrav_q_128, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx512_psrav_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psrav_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psrav_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psrav_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psrav_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_psll_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psll_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_sse2_psll_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psll_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psll_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx2_psll_q, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psll_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psll_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psll_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_pslli_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_pslli_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_sse2_pslli_q, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx2_pslli_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_pslli_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx2_pslli_q, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_pslli_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_pslli_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_pslli_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psllv_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psllv_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_psllv_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psllv_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psllv_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psllv_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psllv_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psllv_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psllv_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_ssse3_psign_b_128, 16, 8, 16, 8, 16, 8) +PROCESS(x86_ssse3_psign_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_psign_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psign_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_avx2_psign_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_psign_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_ssse3_phadd_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_phadd_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_ssse3_phadd_sw_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_phadd_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_phadd_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_phadd_sw, 16, 16, 16, 16, 16, 16) +PROCESS(x86_ssse3_phsub_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_phsub_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_ssse3_phsub_sw_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_phsub_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_phsub_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_phsub_sw, 16, 16, 16, 16, 16, 16) +PROCESS(x86_sse2_pmulh_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_pmulh_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_pmulh_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_pmulhu_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_pmulhu_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_pmulhu_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_pmadd_wd, 4, 32, 8, 16, 8, 16) +PROCESS(x86_avx2_pmadd_wd, 8, 32, 16, 16, 16, 16) +PROCESS(x86_avx512_pmaddw_d_512, 16, 32, 32, 16, 32, 16) +PROCESS(x86_ssse3_pmadd_ub_sw_128, 8, 16, 16, 8, 16, 8) +PROCESS(x86_avx2_pmadd_ub_sw, 16, 16, 32, 8, 32, 8) +PROCESS(x86_avx512_pmaddubs_w_512, 32, 16, 64, 8, 64, 8) +PROCESS(x86_sse2_packsswb_128, 16, 8, 8, 16, 8, 16) +PROCESS(x86_avx2_packsswb, 32, 8, 16, 16, 16, 16) +PROCESS(x86_avx512_packsswb_512, 64, 8, 32, 16, 32, 16) +PROCESS(x86_sse2_packuswb_128, 16, 8, 8, 16, 8, 16) +PROCESS(x86_avx2_packuswb, 32, 8, 16, 16, 16, 16) +PROCESS(x86_avx512_packuswb_512, 64, 8, 32, 16, 32, 16) +PROCESS(x86_sse2_packssdw_128, 8, 16, 4, 32, 4, 32) +PROCESS(x86_avx2_packssdw, 16, 16, 8, 32, 8, 32) +PROCESS(x86_avx512_packssdw_512, 32, 16, 16, 32, 16, 32) +PROCESS(x86_sse41_packusdw, 8, 16, 4, 32, 4, 32) +PROCESS(x86_avx2_packusdw, 16, 16, 8, 32, 8, 32) +PROCESS(x86_avx512_packusdw_512, 32, 16, 16, 32, 16, 32) +PROCESS(x86_sse2_psad_bw, 2, 64, 16, 8, 16, 8) +PROCESS(x86_avx2_psad_bw, 4, 64, 32, 8, 32, 8) +PROCESS(x86_avx512_psad_bw_512, 8, 64, 64, 8, 64, 8) diff --git a/ir/x86_intrinsics_terop.inc b/ir/x86_intrinsics_terop.inc new file mode 100644 index 000000000..9d0265d02 --- /dev/null +++ b/ir/x86_intrinsics_terop.inc @@ -0,0 +1 @@ +PROCESS(x86_avx2_pblendvb, 32, 8, 32, 8, 32, 8, 32, 8) \ No newline at end of file diff --git a/llvm_util/known_fns.cpp b/llvm_util/known_fns.cpp index 440dcb384..120450e64 100644 --- a/llvm_util/known_fns.cpp +++ b/llvm_util/known_fns.cpp @@ -5,6 +5,7 @@ #include "llvm_util/utils.h" #include "ir/function.h" #include "ir/instr.h" +#include "ir/x86_intrinsics.h" #include "llvm/IR/Constants.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -516,8 +517,16 @@ known_call(llvm::CallInst &i, const llvm::TargetLibraryInfo &TLI, RETURN_EXACT(); auto decl = i.getCalledFunction(); + if (!decl) + RETURN_EXACT(); + + // Intel X86 intrinsics + if (decl->hasName() && decl->getName().starts_with("__fksv")) + RETURN_VAL(make_unique(*ty, value_name(i), *args[0], *args[1], + *args[2])); + llvm::LibFunc libfn; - if (!decl || !TLI.getLibFunc(*decl, libfn)) + if (!TLI.getLibFunc(*decl, libfn)) RETURN_EXACT(); auto tci = parse_fn_tailcall(i); diff --git a/llvm_util/llvm2alive.cpp b/llvm_util/llvm2alive.cpp index ec9ebe6f0..b655fd371 100644 --- a/llvm_util/llvm2alive.cpp +++ b/llvm_util/llvm2alive.cpp @@ -2,6 +2,7 @@ // Distributed under the MIT license that can be found in the LICENSE file. #include "llvm_util/llvm2alive.h" +#include "ir/x86_intrinsics.h" #include "llvm_util/known_fns.h" #include "llvm_util/utils.h" #include "util/sort.h" @@ -15,6 +16,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ModRef.h" #include @@ -1227,6 +1229,45 @@ class llvm2alive_ : public llvm::InstVisitor> { case llvm::Intrinsic::prefetch: return NOP(i); + // Intel X86 intrinsics +#define PROCESS(NAME, A, B, C, D, E, F) case llvm::Intrinsic::NAME: +#include "ir/x86_intrinsics_binop.inc" +#undef PROCESS + { + PARSE_BINOP(); + X86IntrinBinOp::Op op; + switch (i.getIntrinsicID()) { +#define PROCESS(NAME, A, B, C, D, E, F) \ + case llvm::Intrinsic::NAME: \ + op = X86IntrinBinOp::NAME; \ + break; +#include "ir/x86_intrinsics_binop.inc" +#undef PROCESS + default: + UNREACHABLE(); + } + return make_unique(*ty, value_name(i), *a, *b, op); + } + +#define PROCESS(NAME, A, B, C, D, E, F, G, H) case llvm::Intrinsic::NAME: +#include "ir/x86_intrinsics_terop.inc" +#undef PROCESS + { + PARSE_TRIOP(); + X86IntrinTerOp::Op op; + switch (i.getIntrinsicID()) { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) \ + case llvm::Intrinsic::NAME: \ + op = X86IntrinTerOp::NAME; \ + break; +#include "ir/x86_intrinsics_terop.inc" +#undef PROCESS + default: + UNREACHABLE(); + } + return make_unique(*ty, value_name(i), *a, *b, *c, op); + } + default: break; } diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll new file mode 100644 index 000000000..fc0b1aa36 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll new file mode 100644 index 000000000..cc4406a27 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll new file mode 100644 index 000000000..5c28e9b95 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %neg = sub <8 x i32> zeroinitializer, %v + ret <8 x i32> %neg +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll new file mode 100644 index 000000000..246786752 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + %2 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> ) + ret <8 x i32> %2 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %neg = sub <8 x i32> zeroinitializer, %v + ret <8 x i32> %neg +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll new file mode 100644 index 000000000..32ce38d0c --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll new file mode 100644 index 000000000..7ff2b04cb --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -disable-undef-input + +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll new file mode 100644 index 000000000..7365b7355 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll new file mode 100644 index 000000000..4e454a092 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll new file mode 100644 index 000000000..a5d6914b9 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll new file mode 100644 index 000000000..1e9fd5c72 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll new file mode 100644 index 000000000..d220fa17b --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll new file mode 100644 index 000000000..118c86192 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll new file mode 100644 index 000000000..3f6994fba --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> poison +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll new file mode 100644 index 000000000..ea86f827a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll new file mode 100644 index 000000000..4f23a76a3 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll new file mode 100644 index 000000000..20d0a355a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -disable-undef-input + +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + %tmp = lshr <4 x i64> %v, + ret <4 x i64> %tmp +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll new file mode 100644 index 000000000..00437e136 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + %tmp = lshr <4 x i64> %v, + ret <4 x i64> %tmp +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll new file mode 100644 index 000000000..abb3c758a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll new file mode 100644 index 000000000..a675c22df --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll new file mode 100644 index 000000000..66bec3e8a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll new file mode 100644 index 000000000..c561c94ef --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll new file mode 100644 index 000000000..f6b3a4803 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll new file mode 100644 index 000000000..5636e4de3 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> poison +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll new file mode 100644 index 000000000..236ef23c1 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll new file mode 100644 index 000000000..a299a1b67 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll new file mode 100644 index 000000000..8ca311c02 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -disable-undef-input + +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + %tmp = lshr <16 x i16> %v, + ret <16 x i16> %tmp +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll new file mode 100644 index 000000000..193f599c9 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll @@ -0,0 +1,13 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + %tmp = lshr <16 x i16> %v, + ret <16 x i16> %tmp +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll new file mode 100644 index 000000000..26e70e2cd --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll new file mode 100644 index 000000000..3f1154eb2 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll new file mode 100644 index 000000000..a15297fab --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll new file mode 100644 index 000000000..eb33719ff --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined + diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll new file mode 100644 index 000000000..6098e6510 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll new file mode 100644 index 000000000..79ced610a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> poison +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll new file mode 100644 index 000000000..2d0ac7d6e --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll new file mode 100644 index 000000000..d7e55fa87 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll @@ -0,0 +1,16 @@ + + +define <32 x i8> @tgt() { +entry: + ret <32 x i8> +} +define <32 x i8> @src() { +entry: + %calltmp = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> , <32 x i8> ) + ret <32 x i8> %calltmp +} + + +declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) + + diff --git a/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll new file mode 100644 index 000000000..244451bd2 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll @@ -0,0 +1,11 @@ + +define <64 x i8> @tgt() { +entry: + ret <64 x i8> +} +define <64 x i8> @src() { +entry: + %calltmp = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> , <64 x i8> ) + ret <64 x i8> %calltmp +} +declare <64 x i8> @llvm.x86.avx512.pavg.b.512 (<64 x i8>, <64 x i8>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll new file mode 100644 index 000000000..825752f25 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> , <8 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll new file mode 100644 index 000000000..736243a6c --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src() { +entry: + %calltmp = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> , <4 x i32> ) + ret <8 x i32> %calltmp +} +define <8 x i32> @tgt() { +entry: + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll new file mode 100644 index 000000000..890add5b3 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll new file mode 100644 index 000000000..ca2da0440 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src() { +entry: + %calltmp = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> , <8 x i32> ) + ret <8 x i32> %calltmp +} +define <8 x i32> @tgt() { +entry: + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll new file mode 100644 index 000000000..b9ca1e0e3 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll new file mode 100644 index 000000000..827d20c56 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll new file mode 100644 index 000000000..9951d2b1b --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src() { +entry: + %calltmp = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> , <8 x i32> ) + ret <8 x i32> %calltmp +} +define <8 x i32> @tgt() { +entry: + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll new file mode 100644 index 000000000..7c98d69d3 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll new file mode 100644 index 000000000..4cadbdd64 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i16> @src() { +entry: + %calltmp = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> , <16 x i8> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { +entry: + ret <8 x i16> +} + +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll new file mode 100644 index 000000000..465e03347 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll @@ -0,0 +1,12 @@ + +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> , <32 x i8> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll new file mode 100644 index 000000000..3f6def4bc --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll @@ -0,0 +1,11 @@ +define <32 x i16> @src() { +entry: + %calltmp = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> , <64 x i8> ) + ret <32 x i16> %calltmp +} +define <32 x i16> @tgt() { +entry: + ret <32 x i16> +} + +declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll b/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll new file mode 100644 index 000000000..469f6abae --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll @@ -0,0 +1,12 @@ +; ERROR: Value mismatch + +; test found by Stefan's random tester + +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> , <8 x i16> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { + ret <8 x i16> +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll new file mode 100644 index 000000000..42abb8ef7 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll @@ -0,0 +1,10 @@ +; test found by Stefan's random tester + +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> , <8 x i16> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { + ret <8 x i16> +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll new file mode 100644 index 000000000..8a39bd30e --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll new file mode 100644 index 000000000..ce0275f93 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -disable-undef-input + +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll new file mode 100644 index 000000000..4f1ffb526 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll new file mode 100644 index 000000000..074634471 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll new file mode 100644 index 000000000..51300dca4 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll new file mode 100644 index 000000000..da506e2c3 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll new file mode 100644 index 000000000..beaf1b44f --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll new file mode 100644 index 000000000..1a856df06 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll new file mode 100644 index 000000000..5c5449aa1 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> poison +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll new file mode 100644 index 000000000..3152e1440 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll new file mode 100644 index 000000000..9b8319a4d --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll new file mode 100644 index 000000000..e76148671 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -disable-undef-input + +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + %tmp = lshr <2 x i64> %v, + ret <2 x i64> %tmp +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll new file mode 100644 index 000000000..7f1b08191 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll @@ -0,0 +1,13 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + %tmp = lshr <2 x i64> %v, + ret <2 x i64> %tmp +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll new file mode 100644 index 000000000..51e949fd8 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll new file mode 100644 index 000000000..3198d8831 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll new file mode 100644 index 000000000..1edd0efeb --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll new file mode 100644 index 000000000..b3cdeeb84 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll @@ -0,0 +1,12 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll new file mode 100644 index 000000000..9bbf111ff --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll new file mode 100644 index 000000000..127443d74 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> poison +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll new file mode 100644 index 000000000..78e237a4c --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll new file mode 100644 index 000000000..51e8ba278 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll new file mode 100644 index 000000000..b5b1f57d4 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll @@ -0,0 +1,13 @@ +; TEST-ARGS: -disable-undef-input + +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + %tmp = lshr <8 x i16> %v, + ret <8 x i16> %tmp +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll new file mode 100644 index 000000000..81c57f18f --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + %tmp = lshr <8 x i16> %v, + ret <8 x i16> %tmp +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll new file mode 100644 index 000000000..24c2a90e0 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll new file mode 100644 index 000000000..33a2a9d38 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll new file mode 100644 index 000000000..caa32b663 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll new file mode 100644 index 000000000..a007979be --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined + diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll new file mode 100644 index 000000000..241d24a15 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll new file mode 100644 index 000000000..bc03df188 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> poison +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll new file mode 100644 index 000000000..29e04d604 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) From efb39d2901422d8ec4a3686a335e79b20b95eea8 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:13:42 +0000 Subject: [PATCH 02/19] Update regression-feb-9-11.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll index 3f6def4bc..c960195a6 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll @@ -1,11 +1,10 @@ define <32 x i16> @src() { -entry: %calltmp = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> , <64 x i8> ) ret <32 x i16> %calltmp } + define <32 x i16> @tgt() { -entry: ret <32 x i16> } -declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) \ No newline at end of file +declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) From bec34d691c451ccd9bd4ef45c56804c0eeaed6e0 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:14:13 +0000 Subject: [PATCH 03/19] Update regression-feb-9-10.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll index 465e03347..23514bf8d 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll @@ -1,12 +1,10 @@ - define <16 x i16> @src() { -entry: %calltmp = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> , <32 x i8> ) ret <16 x i16> %calltmp } + define <16 x i16> @tgt() { -entry: ret <16 x i16> } -declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) \ No newline at end of file +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) From e62a8852c25ce315e2e12c1a2ace5584311e3967 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:14:40 +0000 Subject: [PATCH 04/19] Update regression-feb-9-09.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll index 4cadbdd64..b19b61e9d 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll @@ -1,11 +1,10 @@ define <8 x i16> @src() { -entry: %calltmp = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> , <16 x i8> ) ret <8 x i16> %calltmp } + define <8 x i16> @tgt() { -entry: ret <8 x i16> } -declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) \ No newline at end of file +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) From a23458ce49fd10070e50db9b2c3b0a770a958b46 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:16:18 +0000 Subject: [PATCH 05/19] Update regression-feb-9-08.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll index 7c98d69d3..ace2a13a5 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll @@ -1,11 +1,10 @@ define <16 x i16> @src() { -entry: %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> , <16 x i16> ) ret <16 x i16> %calltmp } + define <16 x i16> @tgt() { -entry: ret <16 x i16> } -declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) \ No newline at end of file +declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) From e3685154bca67e3c96857a8548b6058ef4ca3f2d Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:17:55 +0000 Subject: [PATCH 06/19] Update regression-feb-9-07.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll index 9951d2b1b..5e131ea8d 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll @@ -1,11 +1,10 @@ define <8 x i32> @src() { -entry: %calltmp = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> , <8 x i32> ) ret <8 x i32> %calltmp } + define <8 x i32> @tgt() { -entry: ret <8 x i32> } -declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) \ No newline at end of file +declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) From 9594be555e31f91f94a416fe1fbeafe56ca87908 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:18:57 +0000 Subject: [PATCH 07/19] Update regression-feb-9-06.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll index 827d20c56..4e556ebae 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll @@ -1,11 +1,10 @@ define <16 x i16> @src() { -entry: %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> , <16 x i16> ) ret <16 x i16> %calltmp } + define <16 x i16> @tgt() { -entry: ret <16 x i16> } -declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) \ No newline at end of file +declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) From 51a8f1bbe9402293bc7636295a46952784ffa7fe Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:20:12 +0000 Subject: [PATCH 08/19] Update regression-feb-9-05.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll index b9ca1e0e3..a3fb533dc 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll @@ -1,11 +1,10 @@ define <16 x i16> @src() { -entry: %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> , <16 x i16> ) ret <16 x i16> %calltmp } + define <16 x i16> @tgt() { -entry: ret <16 x i16> } -declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) \ No newline at end of file +declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) From 8b1d677bbff4ec5c606e501728b202f357273e98 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:22:16 +0000 Subject: [PATCH 09/19] Update regression-feb-9-04.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll index ca2da0440..3cfbe4345 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll @@ -1,11 +1,10 @@ define <8 x i32> @src() { -entry: %calltmp = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> , <8 x i32> ) ret <8 x i32> %calltmp } + define <8 x i32> @tgt() { -entry: ret <8 x i32> } -declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) \ No newline at end of file +declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) From 231af0aa090d93c4c19a679159cf055c6ef91986 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:23:25 +0000 Subject: [PATCH 10/19] Update regression-feb-9-03.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll index 890add5b3..210c834e7 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll @@ -1,11 +1,10 @@ define <16 x i16> @src() { -entry: %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> , <16 x i16> ) ret <16 x i16> %calltmp } + define <16 x i16> @tgt() { -entry: ret <16 x i16> } -declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) \ No newline at end of file +declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) From 622f169ff8e8b5612cc867d8b978e3beba03c76f Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:24:36 +0000 Subject: [PATCH 11/19] Update regression-feb-9-02.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll index 736243a6c..1c59d5756 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll @@ -1,11 +1,10 @@ define <8 x i32> @src() { -entry: %calltmp = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> , <4 x i32> ) ret <8 x i32> %calltmp } + define <8 x i32> @tgt() { -entry: ret <8 x i32> } -declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) \ No newline at end of file +declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) From c2d7c2570fc56ba7b4f980c657825e614cb32641 Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:25:41 +0000 Subject: [PATCH 12/19] Update regression-feb-9-01.srctgt.ll --- tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll index 825752f25..edb6612e1 100644 --- a/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll +++ b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll @@ -1,11 +1,10 @@ define <16 x i16> @src() { -entry: %calltmp = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> , <8 x i16> ) ret <16 x i16> %calltmp } + define <16 x i16> @tgt() { -entry: ret <16 x i16> } -declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) \ No newline at end of file +declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) From 391c860767848adbf5d3c78f1dc4f17bc98e943f Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Tue, 3 Dec 2024 16:27:19 +0000 Subject: [PATCH 13/19] Update x86_intrinsics_terop.inc --- ir/x86_intrinsics_terop.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ir/x86_intrinsics_terop.inc b/ir/x86_intrinsics_terop.inc index 9d0265d02..de21d539f 100644 --- a/ir/x86_intrinsics_terop.inc +++ b/ir/x86_intrinsics_terop.inc @@ -1 +1 @@ -PROCESS(x86_avx2_pblendvb, 32, 8, 32, 8, 32, 8, 32, 8) \ No newline at end of file +PROCESS(x86_avx2_pblendvb, 32, 8, 32, 8, 32, 8, 32, 8) From eff1821104d6b939edb68f8e9ec04491c09bf79a Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 3 Dec 2024 17:03:24 +0000 Subject: [PATCH 14/19] tests/vector/x86: strip -disable-undef-input --- tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll | 2 -- tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll | 2 -- tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll | 2 -- tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll | 2 -- tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll | 2 -- tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll | 2 -- 6 files changed, 12 deletions(-) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll index 7ff2b04cb..938f4cb4b 100644 --- a/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll @@ -1,5 +1,3 @@ -; TEST-ARGS: -disable-undef-input - define <8 x i32> @src(<8 x i32> %v) { %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) ret <8 x i32> %1 diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll index 20d0a355a..28d79772c 100644 --- a/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll @@ -1,5 +1,3 @@ -; TEST-ARGS: -disable-undef-input - define <4 x i64> @src(<4 x i64> %v) { %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) ret <4 x i64> %1 diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll index 8ca311c02..9f15a7aa5 100644 --- a/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll @@ -1,5 +1,3 @@ -; TEST-ARGS: -disable-undef-input - define <16 x i16> @src(<16 x i16> %v) { %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) ret <16 x i16> %1 diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll index ce0275f93..837b32860 100644 --- a/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll @@ -1,5 +1,3 @@ -; TEST-ARGS: -disable-undef-input - define <4 x i32> @src(<4 x i32> %v) { %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) ret <4 x i32> %1 diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll index e76148671..c0c0efe6b 100644 --- a/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll @@ -1,5 +1,3 @@ -; TEST-ARGS: -disable-undef-input - define <2 x i64> @src(<2 x i64> %v) { %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) ret <2 x i64> %1 diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll index b5b1f57d4..f577df356 100644 --- a/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll @@ -1,5 +1,3 @@ -; TEST-ARGS: -disable-undef-input - define <8 x i16> @src(<8 x i16> %v) { %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) ret <8 x i16> %1 From ba936fd4c58b6640bab2312121b49ac1a7873e3d Mon Sep 17 00:00:00 2001 From: Nuno Lopes Date: Fri, 6 Dec 2024 19:41:29 +0000 Subject: [PATCH 15/19] Update x86_intrinsics.cpp --- ir/x86_intrinsics.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ir/x86_intrinsics.cpp b/ir/x86_intrinsics.cpp index ba0bd1e60..439990b87 100644 --- a/ir/x86_intrinsics.cpp +++ b/ir/x86_intrinsics.cpp @@ -358,9 +358,7 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { auto [b, bp] = bty->extract(bv, i); expr id = (b & expr::mkUInt(0x0F, 8)) + (expr::mkUInt(i & 0x30, 8)); auto [r, rp] = avty->extract(av, id); - auto ai = expr::mkIf(b.extract(7, 7) == expr::mkUInt(0, 1), r, - expr::mkUInt(0, 8)); - + auto ai = expr::mkIf(b.extract(7, 7) == 0, r, expr::mkUInt(0, 8)); vals.emplace_back(std::move(ai), bp && rp); } return rty->aggregateVals(vals); @@ -674,7 +672,7 @@ StateValue X86IntrinTerOp::toSMT(State &s) const { auto [a, ap] = aty->extract(av, i); auto [b, bp] = bty->extract(bv, i); auto [c, cp] = cty->extract(cv, i); - auto v = expr::mkIf(c.extract(7, 7) == expr::mkUInt(0, 1), a, b); + auto v = expr::mkIf(c.extract(7, 7) == 0, a, b); vals.emplace_back(std::move(v), ap && bp && cp); } return rty->aggregateVals(vals); From 7f35c7e1e305d38e73882c2bb84671bb0fab2634 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 11 Dec 2024 10:55:50 +0000 Subject: [PATCH 16/19] x86_intrinsics: move statics to cpp; strip size --- ir/x86_intrinsics.cpp | 54 +++++++++++++++++++++++++++++++++++++ ir/x86_intrinsics.h | 63 +++++++------------------------------------ 2 files changed, 63 insertions(+), 54 deletions(-) diff --git a/ir/x86_intrinsics.cpp b/ir/x86_intrinsics.cpp index 439990b87..bd08497ed 100644 --- a/ir/x86_intrinsics.cpp +++ b/ir/x86_intrinsics.cpp @@ -68,6 +68,30 @@ vector X86IntrinBinOp::operands() const { return {a, b}; } +std::pair X86IntrinBinOp::shape_op0[] = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(C, D), +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +std::pair X86IntrinBinOp::shape_op1[] = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(E, F), +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +std::pair X86IntrinBinOp::shape_ret[] = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(A, B), +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + +unsigned X86IntrinBinOp::ret_width[] = { +#define PROCESS(NAME, A, B, C, D, E, F) A *B, +#include "x86_intrinsics_binop.inc" +#undef PROCESS +}; + bool X86IntrinBinOp::propagatesPoison() const { return true; } @@ -640,6 +664,36 @@ unique_ptr X86IntrinBinOp::dup(Function &f, const string &suffix) const { return make_unique(getType(), getName() + suffix, *a, *b, op); } +std::pair X86IntrinTerOp::shape_op0[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(C, D), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +std::pair X86IntrinTerOp::shape_op1[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(E, F), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +std::pair X86IntrinTerOp::shape_op2[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(G, H), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +std::pair X86IntrinTerOp::shape_ret[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(A, B), +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + +unsigned X86IntrinTerOp::ret_width[] = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) A *B, +#include "x86_intrinsics_terop.inc" +#undef PROCESS +}; + string X86IntrinTerOp::getOpName(Op op) { switch (op) { #define PROCESS(NAME, A, B, C, D, E, F, G, H) \ diff --git a/ir/x86_intrinsics.h b/ir/x86_intrinsics.h index eee448d58..42ae40259 100644 --- a/ir/x86_intrinsics.h +++ b/ir/x86_intrinsics.h @@ -22,7 +22,6 @@ class FakeShuffle final : public Instr { class X86IntrinBinOp final : public Instr { public: - static constexpr unsigned numOfX86Intrinsics = 135; enum Op { #define PROCESS(NAME, A, B, C, D, E, F) NAME, #include "x86_intrinsics_binop.inc" @@ -30,29 +29,10 @@ class X86IntrinBinOp final : public Instr { }; // the shape of a vector is stored as <# of lanes, element bits> - static constexpr std::array, numOfX86Intrinsics> - shape_op0 = { -#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(C, D), -#include "x86_intrinsics_binop.inc" -#undef PROCESS - }; - static constexpr std::array, numOfX86Intrinsics> - shape_op1 = { -#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(E, F), -#include "x86_intrinsics_binop.inc" -#undef PROCESS - }; - static constexpr std::array, numOfX86Intrinsics> - shape_ret = { -#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(A, B), -#include "x86_intrinsics_binop.inc" -#undef PROCESS - }; - static constexpr std::array ret_width = { -#define PROCESS(NAME, A, B, C, D, E, F) A *B, -#include "x86_intrinsics_binop.inc" -#undef PROCESS - }; + static std::pair shape_op0[]; + static std::pair shape_op1[]; + static std::pair shape_ret[]; + static unsigned ret_width[]; private: Value *a, *b; @@ -78,7 +58,6 @@ class X86IntrinBinOp final : public Instr { class X86IntrinTerOp final : public Instr { public: - static constexpr unsigned numOfX86Intrinsics = 1; enum Op { #define PROCESS(NAME, A, B, C, D, E, F, G, H) NAME, #include "x86_intrinsics_terop.inc" @@ -86,35 +65,11 @@ class X86IntrinTerOp final : public Instr { }; // the shape of a vector is stored as <# of lanes, element bits> - static constexpr std::array, numOfX86Intrinsics> - shape_op0 = { -#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(C, D), -#include "x86_intrinsics_terop.inc" -#undef PROCESS - }; - static constexpr std::array, numOfX86Intrinsics> - shape_op1 = { -#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(E, F), -#include "x86_intrinsics_terop.inc" -#undef PROCESS - }; - static constexpr std::array, numOfX86Intrinsics> - shape_op2 = { -#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(G, H), -#include "x86_intrinsics_terop.inc" -#undef PROCESS - }; - static constexpr std::array, numOfX86Intrinsics> - shape_ret = { -#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(A, B), -#include "x86_intrinsics_terop.inc" -#undef PROCESS - }; - static constexpr std::array ret_width = { -#define PROCESS(NAME, A, B, C, D, E, F, G, H) A *B, -#include "x86_intrinsics_terop.inc" -#undef PROCESS - }; + static std::pair shape_op0[]; + static std::pair shape_op1[]; + static std::pair shape_op2[]; + static std::pair shape_ret[]; + static unsigned ret_width[]; private: Value *a, *b, *c; From cb9ab71c32783c3428615c5f28463e812924dbbb Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 11 Dec 2024 11:03:20 +0000 Subject: [PATCH 17/19] known_fns: strip cryptic fksv --- llvm_util/known_fns.cpp | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/llvm_util/known_fns.cpp b/llvm_util/known_fns.cpp index 120450e64..440dcb384 100644 --- a/llvm_util/known_fns.cpp +++ b/llvm_util/known_fns.cpp @@ -5,7 +5,6 @@ #include "llvm_util/utils.h" #include "ir/function.h" #include "ir/instr.h" -#include "ir/x86_intrinsics.h" #include "llvm/IR/Constants.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/TargetLibraryInfo.h" @@ -517,16 +516,8 @@ known_call(llvm::CallInst &i, const llvm::TargetLibraryInfo &TLI, RETURN_EXACT(); auto decl = i.getCalledFunction(); - if (!decl) - RETURN_EXACT(); - - // Intel X86 intrinsics - if (decl->hasName() && decl->getName().starts_with("__fksv")) - RETURN_VAL(make_unique(*ty, value_name(i), *args[0], *args[1], - *args[2])); - llvm::LibFunc libfn; - if (!TLI.getLibFunc(*decl, libfn)) + if (!decl || !TLI.getLibFunc(*decl, libfn)) RETURN_EXACT(); auto tci = parse_fn_tailcall(i); From 7c8b5993a1bbff894efeaee863d82efd32979006 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 11 Dec 2024 11:12:54 +0000 Subject: [PATCH 18/19] x86_intrinsics: strip dead FakeShuffle --- ir/x86_intrinsics.cpp | 59 ------------------------------------------- ir/x86_intrinsics.h | 17 ------------- 2 files changed, 76 deletions(-) diff --git a/ir/x86_intrinsics.cpp b/ir/x86_intrinsics.cpp index bd08497ed..ce13fd544 100644 --- a/ir/x86_intrinsics.cpp +++ b/ir/x86_intrinsics.cpp @@ -5,65 +5,6 @@ using namespace smt; using namespace std; namespace IR { -vector FakeShuffle::operands() const { - return {v1, v2, mask}; -} - -bool FakeShuffle::propagatesPoison() const { - return false; -} - -bool FakeShuffle::hasSideEffects() const { - return false; -} - -void FakeShuffle::rauw(const Value &what, Value &with) { - RAUW(v1); - RAUW(v2); - RAUW(mask); -} - -void FakeShuffle::print(ostream &os) const { - os << getName() << " = fakesv " << *v1 << ", " << *v2 << ", " << *mask; -} - -StateValue FakeShuffle::toSMT(State &s) const { - auto vty = - static_cast(v1->getType().getAsAggregateType()); - auto mty = mask->getType().getAsAggregateType(); - auto sz = vty->numElementsConst(); - vector vals; - - for (unsigned i = 0, e = mty->numElementsConst(); i != e; ++i) { - auto [m_v, m_p] = mty->extract(s[*mask], i); - expr bound = expr::mkUInt(sz, m_v); - expr idx = m_v.urem(bound); - auto [v1v, v1p] = vty->extract(s[*v1], idx); - auto [v2v, v2p] = vty->extract(s[*v2], idx); - expr v = expr::mkIf(m_v.ult(bound), v1v, v2v); - expr np = expr::mkIf(m_v.ult(bound), v1p, v2p); - expr inbounds = m_v.ult(expr::mkUInt(vty->numElementsConst() * 2, m_v)); - - vals.emplace_back(std::move(v), inbounds && np); - } - - return getType().getAsAggregateType()->aggregateVals(vals); -} - -expr FakeShuffle::getTypeConstraints(const Function &f) const { - return Value::getTypeConstraints() && - getType().enforceVectorTypeSameChildTy(v1->getType()) && - getType().getAsAggregateType()->numElements() == - mask->getType().getAsAggregateType()->numElements() && - v1->getType().enforceVectorType() && v1->getType() == v2->getType() && - mask->getType().enforceVectorType(); -} - -unique_ptr FakeShuffle::dup(Function &f, const string &suffix) const { - return make_unique(getType(), getName() + suffix, *v1, *v2, - *mask); -} - vector X86IntrinBinOp::operands() const { return {a, b}; } diff --git a/ir/x86_intrinsics.h b/ir/x86_intrinsics.h index 42ae40259..566885f92 100644 --- a/ir/x86_intrinsics.h +++ b/ir/x86_intrinsics.h @@ -3,23 +3,6 @@ #include "ir/instr.h" namespace IR { -class FakeShuffle final : public Instr { - Value *v1, *v2, *mask; - -public: - FakeShuffle(Type &type, std::string &&name, Value &v1, Value &v2, Value &mask) - : Instr(type, std::move(name)), v1(&v1), v2(&v2), mask(&mask) {} - std::vector operands() const override; - bool propagatesPoison() const override; - bool hasSideEffects() const override; - void rauw(const Value &what, Value &with) override; - void print(std::ostream &os) const override; - StateValue toSMT(State &s) const override; - smt::expr getTypeConstraints(const Function &f) const override; - std::unique_ptr dup(Function &f, - const std::string &suffix) const override; -}; - class X86IntrinBinOp final : public Instr { public: enum Op { From 3c299a55abc47e10f9ab8662bf88698eef0da9f1 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 11 Dec 2024 12:25:37 +0000 Subject: [PATCH 19/19] x86_intrinsics: move static globals from h to cpp --- ir/x86_intrinsics.cpp | 106 +++++++++++++++++++++++------------------- ir/x86_intrinsics.h | 21 +-------- 2 files changed, 60 insertions(+), 67 deletions(-) diff --git a/ir/x86_intrinsics.cpp b/ir/x86_intrinsics.cpp index ce13fd544..22d2ec398 100644 --- a/ir/x86_intrinsics.cpp +++ b/ir/x86_intrinsics.cpp @@ -5,34 +5,39 @@ using namespace smt; using namespace std; namespace IR { -vector X86IntrinBinOp::operands() const { - return {a, b}; -} - -std::pair X86IntrinBinOp::shape_op0[] = { +// the shape of a vector is stored as <# of lanes, element bits> +static constexpr std::pair binop_shape_op0[] = { #define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(C, D), #include "x86_intrinsics_binop.inc" #undef PROCESS }; -std::pair X86IntrinBinOp::shape_op1[] = { +static constexpr std::pair binop_shape_op1[] = { #define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(E, F), #include "x86_intrinsics_binop.inc" #undef PROCESS }; -std::pair X86IntrinBinOp::shape_ret[] = { +static constexpr std::pair binop_shape_ret[] = { #define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(A, B), #include "x86_intrinsics_binop.inc" #undef PROCESS }; -unsigned X86IntrinBinOp::ret_width[] = { +static constexpr unsigned binop_ret_width[] = { #define PROCESS(NAME, A, B, C, D, E, F) A *B, #include "x86_intrinsics_binop.inc" #undef PROCESS }; +unsigned X86IntrinBinOp::getRetWidth(Op op) { + return binop_ret_width[op]; +} + +vector X86IntrinBinOp::operands() const { + return {a, b}; +} + bool X86IntrinBinOp::propagatesPoison() const { return true; } @@ -318,7 +323,7 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { case x86_avx512_pshuf_b_512: { auto avty = static_cast(aty); vector vals; - unsigned laneCount = shape_ret[op].first; + unsigned laneCount = binop_shape_ret[op].first; for (unsigned i = 0; i != laneCount; ++i) { auto [b, bp] = bty->extract(bv, i); expr id = (b & expr::mkUInt(0x0F, 8)) + (expr::mkUInt(i & 0x30, 8)); @@ -342,8 +347,8 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { case x86_avx2_phsub_d: case x86_avx2_phsub_sw: { vector vals; - unsigned laneCount = shape_ret[op].first; - unsigned groupsize = 128 / shape_ret[op].second; + unsigned laneCount = binop_shape_ret[op].first; + unsigned groupsize = 128 / binop_shape_ret[op].second; function fn; switch (op) { case x86_ssse3_phadd_w_128: @@ -484,7 +489,7 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { case x86_avx2_pmadd_ub_sw: case x86_avx512_pmaddubs_w_512: { vector vals; - for (unsigned i = 0, e = shape_ret[op].first; i != e; ++i) { + for (unsigned i = 0, e = binop_shape_ret[op].first; i != e; ++i) { auto [a1, a1p] = aty->extract(av, i * 2); auto [a2, a2p] = aty->extract(av, i * 2 + 1); auto [b1, b1p] = bty->extract(bv, i * 2); @@ -537,8 +542,8 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { }; } - unsigned groupsize = 128 / shape_op1[op].second; - unsigned laneCount = shape_op1[op].first; + unsigned groupsize = 128 / binop_shape_op1[op].second; + unsigned laneCount = binop_shape_op1[op].first; for (unsigned j = 0; j != laneCount / groupsize; j++) { for (unsigned i = 0; i != groupsize; i++) { auto [a1, p1] = aty->extract(av, j * groupsize + i); @@ -554,7 +559,7 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { case x86_sse2_psad_bw: case x86_avx2_psad_bw: case x86_avx512_psad_bw_512: { - unsigned ngroup = shape_ret[op].first; + unsigned ngroup = binop_shape_ret[op].first; vector vals; for (unsigned j = 0; j < ngroup; ++j) { expr np = true; @@ -578,58 +583,59 @@ StateValue X86IntrinBinOp::toSMT(State &s) const { expr X86IntrinBinOp::getTypeConstraints(const Function &f) const { return Value::getTypeConstraints() && - (shape_op0[op].first != 1 + (binop_shape_op0[op].first != 1 ? a->getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_op0[op].second); + return ty.enforceIntType(binop_shape_op0[op].second); }) && a->getType().getAsAggregateType()->numElements() == - shape_op0[op].first - : a->getType().enforceIntType(shape_op0[op].second)) && - (shape_op1[op].first != 1 + binop_shape_op0[op].first + : a->getType().enforceIntType(binop_shape_op0[op].second)) && + (binop_shape_op1[op].first != 1 ? b->getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_op1[op].second); + return ty.enforceIntType(binop_shape_op1[op].second); }) && b->getType().getAsAggregateType()->numElements() == - shape_op1[op].first - : b->getType().enforceIntType(shape_op1[op].second)) && - (shape_ret[op].first != 1 + binop_shape_op1[op].first + : b->getType().enforceIntType(binop_shape_op1[op].second)) && + (binop_shape_ret[op].first != 1 ? getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_ret[op].second); + return ty.enforceIntType(binop_shape_ret[op].second); }) && getType().getAsAggregateType()->numElements() == - shape_ret[op].first - : getType().enforceIntType(shape_ret[op].second)); + binop_shape_ret[op].first + : getType().enforceIntType(binop_shape_ret[op].second)); } unique_ptr X86IntrinBinOp::dup(Function &f, const string &suffix) const { return make_unique(getType(), getName() + suffix, *a, *b, op); } -std::pair X86IntrinTerOp::shape_op0[] = { +// the shape of a vector is stored as <# of lanes, element bits> +static constexpr std::pair terop_shape_op0[] = { #define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(C, D), #include "x86_intrinsics_terop.inc" #undef PROCESS }; -std::pair X86IntrinTerOp::shape_op1[] = { +static constexpr std::pair terop_shape_op1[] = { #define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(E, F), #include "x86_intrinsics_terop.inc" #undef PROCESS }; -std::pair X86IntrinTerOp::shape_op2[] = { +static constexpr std::pair terop_shape_op2[] = { #define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(G, H), #include "x86_intrinsics_terop.inc" #undef PROCESS }; -std::pair X86IntrinTerOp::shape_ret[] = { +static constexpr std::pair terop_shape_ret[] = { #define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(A, B), #include "x86_intrinsics_terop.inc" #undef PROCESS }; -unsigned X86IntrinTerOp::ret_width[] = { +static constexpr unsigned terop_ret_width[] = { #define PROCESS(NAME, A, B, C, D, E, F, G, H) A *B, #include "x86_intrinsics_terop.inc" #undef PROCESS @@ -646,6 +652,10 @@ string X86IntrinTerOp::getOpName(Op op) { UNREACHABLE(); } +unsigned X86IntrinTerOp::getRetWidth(Op op) { + return terop_ret_width[op]; +} + void X86IntrinTerOp::print(ostream &os) const { os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; } @@ -678,34 +688,34 @@ StateValue X86IntrinTerOp::toSMT(State &s) const { expr X86IntrinTerOp::getTypeConstraints(const Function &f) const { return Value::getTypeConstraints() && - (shape_op0[op].first != 1 + (terop_shape_op0[op].first != 1 ? a->getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_op0[op].second); + return ty.enforceIntType(terop_shape_op0[op].second); }) && a->getType().getAsAggregateType()->numElements() == - shape_op0[op].first - : a->getType().enforceIntType(shape_op0[op].second)) && - (shape_op1[op].first != 1 + terop_shape_op0[op].first + : a->getType().enforceIntType(terop_shape_op0[op].second)) && + (terop_shape_op1[op].first != 1 ? b->getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_op1[op].second); + return ty.enforceIntType(terop_shape_op1[op].second); }) && b->getType().getAsAggregateType()->numElements() == - shape_op1[op].first - : b->getType().enforceIntType(shape_op1[op].second)) && - (shape_op2[op].first != 1 + terop_shape_op1[op].first + : b->getType().enforceIntType(terop_shape_op1[op].second)) && + (terop_shape_op2[op].first != 1 ? b->getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_op2[op].second); + return ty.enforceIntType(terop_shape_op2[op].second); }) && b->getType().getAsAggregateType()->numElements() == - shape_op2[op].first - : b->getType().enforceIntType(shape_op2[op].second)) && - (shape_ret[op].first != 1 + terop_shape_op2[op].first + : b->getType().enforceIntType(terop_shape_op2[op].second)) && + (terop_shape_ret[op].first != 1 ? getType().enforceVectorType([this](auto &ty) { - return ty.enforceIntType(shape_ret[op].second); + return ty.enforceIntType(terop_shape_ret[op].second); }) && getType().getAsAggregateType()->numElements() == - shape_ret[op].first - : getType().enforceIntType(shape_ret[op].second)); + terop_shape_ret[op].first + : getType().enforceIntType(terop_shape_ret[op].second)); } unique_ptr X86IntrinTerOp::dup(Function &f, const string &suffix) const { diff --git a/ir/x86_intrinsics.h b/ir/x86_intrinsics.h index 566885f92..aaa09c989 100644 --- a/ir/x86_intrinsics.h +++ b/ir/x86_intrinsics.h @@ -11,20 +11,12 @@ class X86IntrinBinOp final : public Instr { #undef PROCESS }; - // the shape of a vector is stored as <# of lanes, element bits> - static std::pair shape_op0[]; - static std::pair shape_op1[]; - static std::pair shape_ret[]; - static unsigned ret_width[]; - private: Value *a, *b; Op op; public: - static unsigned getRetWidth(Op op) { - return ret_width[op]; - } + static unsigned getRetWidth(Op op); X86IntrinBinOp(Type &type, std::string &&name, Value &a, Value &b, Op op) : Instr(type, std::move(name)), a(&a), b(&b), op(op) {} std::vector operands() const override; @@ -47,21 +39,12 @@ class X86IntrinTerOp final : public Instr { #undef PROCESS }; - // the shape of a vector is stored as <# of lanes, element bits> - static std::pair shape_op0[]; - static std::pair shape_op1[]; - static std::pair shape_op2[]; - static std::pair shape_ret[]; - static unsigned ret_width[]; - private: Value *a, *b, *c; Op op; public: - static unsigned getRetWidth(Op op) { - return ret_width[op]; - } + static unsigned getRetWidth(Op op); X86IntrinTerOp(Type &type, std::string &&name, Value &a, Value &b, Value &c, Op op) : Instr(type, std::move(name)), a(&a), b(&b), c(&c), op(op) {}