diff --git a/ir/instr.cpp b/ir/instr.cpp index 7402326ba..c21903acd 100644 --- a/ir/instr.cpp +++ b/ir/instr.cpp @@ -19,6 +19,8 @@ using namespace smt; using namespace util; using namespace std; +bool useApprox = false; + #define RAUW(val) \ if (val == &what) \ val = &with @@ -846,26 +848,50 @@ StateValue FpBinOp::toSMT(State &s) const { switch (op) { case FAdd: - fn = [](const expr &a, const expr &b, const expr &rm) { - return a.fadd(b, rm); + fn = [&s](const expr &a, const expr &b, const expr &rm) { + if (useApprox) { + expr fadd = expr::mkUF("fadd_appprox", {a, b}, a); + s.doesApproximation("fadd_approx", fadd); + return fadd; + } else { + return a.fadd(b, rm); + } }; break; case FSub: - fn = [](const expr &a, const expr &b, const expr &rm) { - return a.fsub(b, rm); + fn = [&s](const expr &a, const expr &b, const expr &rm) { + if (useApprox) { + expr fsub = expr::mkUF("fsub_appprox", {a, b}, a); + s.doesApproximation("fsub_approx", fsub); + return fsub; + } else { + return a.fsub(b, rm); + } }; break; case FMul: - fn = [](const expr &a, const expr &b, const expr &rm) { - return a.fmul(b, rm); + fn = [&s](const expr &a, const expr &b, const expr &rm) { + if (useApprox) { + expr fmul = expr::mkUF("fmul_appprox", {a, b}, a); + s.doesApproximation("fsub_approx", fmul); + return fmul; + } else { + return a.fmul(b, rm); + } }; break; case FDiv: - fn = [](const expr &a, const expr &b, const expr &rm) { - return a.fdiv(b, rm); + fn = [&s](const expr &a, const expr &b, const expr &rm) { + if (useApprox) { + expr fdiv = expr::mkUF("fdiv_approx", {a, b}, a); + s.doesApproximation("fdiv_approx", fdiv); + return fdiv; + } else { + return a.fdiv(b, rm); + } }; break; @@ -881,28 +907,53 @@ StateValue FpBinOp::toSMT(State &s) const { case FMin: case FMax: fn = [&](const expr &a, const expr &b, const expr &rm) { - expr ndet = s.getFreshNondetVar("maxminnondet", true); - expr cmp = op == FMin ? a.fole(b) : a.foge(b); - return expr::mkIf(a.isNaN(), b, - expr::mkIf(b.isNaN(), a, - expr::mkIf(a.foeq(b), - expr::mkIf(ndet, a, b), - expr::mkIf(cmp, a, b)))); + if (useApprox) { + expr fm = op == FMin ? expr::mkUF("fmin_approx", {a, b}, a) + : expr::mkUF("fmax_approx", {a, b}, a); + if (op == FMin) + s.doesApproximation("fmin_approx", fm); + else + s.doesApproximation("fmax_approx", fm); + + return fm; + } else { + expr ndet = s.getFreshNondetVar("maxminnondet", true); + auto ndz = + expr::mkIf(ndet, expr::mkNumber("0", a), expr::mkNumber("-0", a)); + + expr z = a.isFPZero() && b.isFPZero(); + expr cmp = op == FMin ? a.fole(b) : a.foge(b); + return expr::mkIf( + a.isNaN(), b, + expr::mkIf(b.isNaN(), a, + expr::mkIf(z, ndz, expr::mkIf(cmp, a, b)))); + } }; break; case FMinimum: case FMaximum: fn = [&](const expr &a, const expr &b, const expr &rm) { - expr zpos = expr::mkNumber("0", a), zneg = expr::mkNumber("-0", a); - expr cmp = (op == FMinimum) ? a.fole(b) : a.foge(b); - expr neg_cond = op == FMinimum ? (a.isFPNegative() || b.isFPNegative()) - : (a.isFPNegative() && b.isFPNegative()); - expr e = expr::mkIf(a.isFPZero() && b.isFPZero(), - expr::mkIf(neg_cond, zneg, zpos), - expr::mkIf(cmp, a, b)); - - return expr::mkIf(a.isNaN(), a, expr::mkIf(b.isNaN(), b, e)); + if (useApprox) { + expr fm = op == FMinimum ? expr::mkUF("fminimum_approx", {a, b}, a) + : expr::mkUF("fmaximum_approx", {a, b}, a); + if (op == FMinimum) + s.doesApproximation("fminimum_approx", fm); + else + s.doesApproximation("fmaximum_approx", fm); + + return fm; + } else { + expr zpos = expr::mkNumber("0", a), zneg = expr::mkNumber("-0", a); + expr cmp = (op == FMinimum) ? a.fole(b) : a.foge(b); + expr neg_cond = op == FMinimum ? (a.isFPNegative() || b.isFPNegative()) + : (a.isFPNegative() && b.isFPNegative()); + expr e = + expr::mkIf(a.isFPZero() && b.isFPZero(), + expr::mkIf(neg_cond, zneg, zpos), expr::mkIf(cmp, a, b)); + + return expr::mkIf(a.isNaN(), a, expr::mkIf(b.isNaN(), b, e)); + } }; break; @@ -4932,6 +4983,742 @@ unique_ptr ShuffleVector::dup(Function &f, const string &suffix) const { *v1, *v2, mask); } +vector FakeShuffle::operands() const { + return {v1, v2, mask}; +} + +bool FakeShuffle::propagatesPoison() const { + return false; +} + +bool FakeShuffle::hasSideEffects() const { + return false; +} + +void FakeShuffle::rauw(const Value &what, Value &with) { + RAUW(v1); + RAUW(v2); + RAUW(mask); +} + +void FakeShuffle::print(ostream &os) const { + os << getName() << " = fakesv " << *v1 << ", " << *v2 << ", " << *mask; +} + +StateValue FakeShuffle::toSMT(State &s) const { + auto vty = + static_cast(v1->getType().getAsAggregateType()); + auto mty = mask->getType().getAsAggregateType(); + auto sz = vty->numElementsConst(); + vector vals; + + for (unsigned i = 0, e = mty->numElementsConst(); i != e; ++i) { + auto [m_v, m_p] = mty->extract(s[*mask], i); + expr bound = expr::mkUInt(sz, m_v); + expr idx = m_v.urem(bound); + auto [v1v, v1p] = vty->extract(s[*v1], idx); + auto [v2v, v2p] = vty->extract(s[*v2], idx); + expr v = expr::mkIf(m_v.ult(bound), v1v, v2v); + expr np = expr::mkIf(m_v.ult(bound), v1p, v2p); + expr inbounds = m_v.ult(expr::mkUInt(vty->numElementsConst() * 2, m_v)); + + vals.emplace_back(std::move(v), inbounds && np); + } + + return getType().getAsAggregateType()->aggregateVals(vals); +} + +expr FakeShuffle::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + getType().enforceVectorTypeSameChildTy(v1->getType()) && + getType().getAsAggregateType()->numElements() == + mask->getType().getAsAggregateType()->numElements() && + v1->getType().enforceVectorType() && v1->getType() == v2->getType() && + mask->getType().enforceVectorType(); +} + +unique_ptr FakeShuffle::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *v1, *v2, + *mask); +} + +vector X86IntrinBinOp::operands() const { + return {a, b}; +} + +bool X86IntrinBinOp::propagatesPoison() const { + return true; +} + +bool X86IntrinBinOp::hasSideEffects() const { + return false; +} + +void X86IntrinBinOp::rauw(const Value &what, Value &with) { + RAUW(a); + RAUW(b); +} + +string X86IntrinBinOp::getOpName(Op op) { + switch (op) { +#define PROCESS(NAME, A, B, C, D, E, F) \ + case NAME: \ + return #NAME; +#include "intrinsics_binop.h" +#undef PROCESS + } + UNREACHABLE(); +} + +void X86IntrinBinOp::print(ostream &os) const { + os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; +} + +StateValue X86IntrinBinOp::toSMT(State &s) const { + auto rty = getType().getAsAggregateType(); + auto aty = a->getType().getAsAggregateType(); + auto bty = b->getType().getAsAggregateType(); + auto &av = s[*a]; + auto &bv = s[*b]; + + switch (op) { + // shift by one variable + case x86_sse2_psrl_w: + case x86_sse2_psrl_d: + case x86_sse2_psrl_q: + case x86_avx2_psrl_w: + case x86_avx2_psrl_d: + case x86_avx2_psrl_q: + case x86_avx512_psrl_w_512: + case x86_avx512_psrl_d_512: + case x86_avx512_psrl_q_512: + case x86_sse2_psra_w: + case x86_sse2_psra_d: + case x86_avx2_psra_w: + case x86_avx2_psra_d: + case x86_avx512_psra_q_128: + case x86_avx512_psra_q_256: + case x86_avx512_psra_w_512: + case x86_avx512_psra_d_512: + case x86_avx512_psra_q_512: + case x86_sse2_psll_w: + case x86_sse2_psll_d: + case x86_sse2_psll_q: + case x86_avx2_psll_w: + case x86_avx2_psll_d: + case x86_avx2_psll_q: + case x86_avx512_psll_w_512: + case x86_avx512_psll_d_512: + case x86_avx512_psll_q_512: { + vector vals; + unsigned elem_bw = bty->getChild(0).bits(); + + expr shift_np = true; + expr shift_v; + // extract lower 64 bits from b + for (unsigned i = 0, e = 64 / elem_bw; i != e; ++i) { + StateValue vv = bty->extract(bv, i); + shift_v = (i == 0) ? vv.value : vv.value.concat(shift_v); + // if any elements in lower 64 bits is poison, the result is poison + shift_np &= vv.non_poison; + } + function fn; + switch (op) { + case x86_sse2_psrl_w: + case x86_sse2_psrl_d: + case x86_sse2_psrl_q: + case x86_avx2_psrl_w: + case x86_avx2_psrl_d: + case x86_avx2_psrl_q: + case x86_avx512_psrl_w_512: + case x86_avx512_psrl_d_512: + case x86_avx512_psrl_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkUInt(0, elem_bw), a.lshr(b)); + }; + break; + case x86_sse2_psra_w: + case x86_sse2_psra_d: + case x86_avx2_psra_w: + case x86_avx2_psra_d: + case x86_avx512_psra_q_128: + case x86_avx512_psra_q_256: + case x86_avx512_psra_w_512: + case x86_avx512_psra_d_512: + case x86_avx512_psra_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkIf(a.isNegative(), expr::mkUInt(-1, elem_bw), + expr::mkUInt(0, elem_bw)), + a.ashr(b)); + }; + break; + case x86_sse2_psll_w: + case x86_sse2_psll_d: + case x86_sse2_psll_q: + case x86_avx2_psll_w: + case x86_avx2_psll_d: + case x86_avx2_psll_q: + case x86_avx512_psll_w_512: + case x86_avx512_psll_d_512: + case x86_avx512_psll_q_512: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf(shift_v.uge(expr::mkUInt(elem_bw, 64)), + expr::mkUInt(0, elem_bw), a << b); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = aty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + expr shift = fn(ai.value, shift_v.trunc(elem_bw)); + vals.emplace_back(std::move(shift), shift_np && ai.non_poison); + } + return rty->aggregateVals(vals); + } + // vertical + case x86_sse2_pavg_w: + case x86_sse2_pavg_b: + case x86_avx2_pavg_w: + case x86_avx2_pavg_b: + case x86_avx512_pavg_w_512: + case x86_avx512_pavg_b_512: + case x86_ssse3_psign_b_128: + case x86_ssse3_psign_w_128: + case x86_ssse3_psign_d_128: + case x86_avx2_psign_b: + case x86_avx2_psign_w: + case x86_avx2_psign_d: + case x86_avx2_psrlv_d: + case x86_avx2_psrlv_d_256: + case x86_avx2_psrlv_q: + case x86_avx2_psrlv_q_256: + case x86_avx512_psrlv_d_512: + case x86_avx512_psrlv_q_512: + case x86_avx512_psrlv_w_128: + case x86_avx512_psrlv_w_256: + case x86_avx512_psrlv_w_512: + case x86_avx2_psrav_d: + case x86_avx2_psrav_d_256: + case x86_avx512_psrav_d_512: + case x86_avx512_psrav_q_128: + case x86_avx512_psrav_q_256: + case x86_avx512_psrav_q_512: + case x86_avx512_psrav_w_128: + case x86_avx512_psrav_w_256: + case x86_avx512_psrav_w_512: + case x86_avx2_psllv_d: + case x86_avx2_psllv_d_256: + case x86_avx2_psllv_q: + case x86_avx2_psllv_q_256: + case x86_avx512_psllv_d_512: + case x86_avx512_psllv_q_512: + case x86_avx512_psllv_w_128: + case x86_avx512_psllv_w_256: + case x86_avx512_psllv_w_512: + case x86_sse2_pmulh_w: + case x86_avx2_pmulh_w: + case x86_avx512_pmulh_w_512: + case x86_sse2_pmulhu_w: + case x86_avx2_pmulhu_w: + case x86_avx512_pmulhu_w_512: { + vector vals; + function fn; + switch (op) { + case x86_sse2_pavg_w: + case x86_sse2_pavg_b: + case x86_avx2_pavg_w: + case x86_avx2_pavg_b: + case x86_avx512_pavg_w_512: + case x86_avx512_pavg_b_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return (a.zext(1) + b.zext(1) + expr::mkUInt(1, bw + 1)) + .lshr(expr::mkUInt(1, bw + 1)) + .trunc(bw); + }; + break; + case x86_ssse3_psign_b_128: + case x86_ssse3_psign_w_128: + case x86_ssse3_psign_d_128: + case x86_avx2_psign_b: + case x86_avx2_psign_w: + case x86_avx2_psign_d: + fn = [&](auto a, auto b) -> expr { + return expr::mkIf( + b.isZero(), b, + expr::mkIf(b.isNegative(), expr::mkUInt(0, a.bits()) - a, a)); + }; + break; + case x86_avx2_psrlv_d: + case x86_avx2_psrlv_d_256: + case x86_avx2_psrlv_q: + case x86_avx2_psrlv_q_256: + case x86_avx512_psrlv_d_512: + case x86_avx512_psrlv_q_512: + case x86_avx512_psrlv_w_128: + case x86_avx512_psrlv_w_256: + case x86_avx512_psrlv_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), expr::mkUInt(0, bw), + a.lshr(b)); + }; + break; + case x86_avx2_psrav_d: + case x86_avx2_psrav_d_256: + case x86_avx512_psrav_d_512: + case x86_avx512_psrav_q_128: + case x86_avx512_psrav_q_256: + case x86_avx512_psrav_q_512: + case x86_avx512_psrav_w_128: + case x86_avx512_psrav_w_256: + case x86_avx512_psrav_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), + expr::mkIf(a.isNegative(), expr::mkUInt(-1, bw), + expr::mkUInt(0, bw)), + a.ashr(b)); + }; + break; + case x86_avx2_psllv_d: + case x86_avx2_psllv_d_256: + case x86_avx2_psllv_q: + case x86_avx2_psllv_q_256: + case x86_avx512_psllv_d_512: + case x86_avx512_psllv_q_512: + case x86_avx512_psllv_w_128: + case x86_avx512_psllv_w_256: + case x86_avx512_psllv_w_512: + fn = [&](auto a, auto b) -> expr { + unsigned bw = a.bits(); + return expr::mkIf(b.uge(expr::mkUInt(bw, bw)), expr::mkUInt(0, bw), + a << b); + }; + break; + case x86_sse2_pmulh_w: + case x86_avx2_pmulh_w: + case x86_avx512_pmulh_w_512: + fn = [&](auto a, auto b) -> expr { + expr mul = a.sext(16) * b.sext(16); + return mul.extract(31, 16); + }; + break; + case x86_sse2_pmulhu_w: + case x86_avx2_pmulhu_w: + case x86_avx512_pmulhu_w_512: + fn = [&](auto a, auto b) -> expr { + expr mul = a.zext(16) * b.zext(16); + return mul.extract(31, 16); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = rty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + auto bi = bty->extract(bv, i); + vals.emplace_back(fn(ai.value, bi.value), ai.non_poison && bi.non_poison); + } + return rty->aggregateVals(vals); + } + // pshuf.b + case x86_ssse3_pshuf_b_128: + case x86_avx2_pshuf_b: + case x86_avx512_pshuf_b_512: { + auto avty = static_cast(aty); + vector vals; + unsigned laneCount = shape_ret[op].first; + for (unsigned i = 0; i != laneCount; ++i) { + auto [b, bp] = bty->extract(bv, i); + expr id = (b & expr::mkUInt(0x0F, 8)) + (expr::mkUInt(i & 0x30, 8)); + auto [r, rp] = avty->extract(av, id); + auto ai = expr::mkIf(b.extract(7, 7) == expr::mkUInt(0, 1), r, + expr::mkUInt(0, 8)); + + vals.emplace_back(std::move(ai), bp && rp); + } + return rty->aggregateVals(vals); + } + // horizontal + case x86_ssse3_phadd_w_128: + case x86_ssse3_phadd_d_128: + case x86_ssse3_phadd_sw_128: + case x86_avx2_phadd_w: + case x86_avx2_phadd_d: + case x86_avx2_phadd_sw: + case x86_ssse3_phsub_w_128: + case x86_ssse3_phsub_d_128: + case x86_ssse3_phsub_sw_128: + case x86_avx2_phsub_w: + case x86_avx2_phsub_d: + case x86_avx2_phsub_sw: { + vector vals; + unsigned laneCount = shape_ret[op].first; + unsigned groupsize = 128 / shape_ret[op].second; + function fn; + switch (op) { + case x86_ssse3_phadd_w_128: + case x86_ssse3_phadd_d_128: + case x86_avx2_phadd_w: + case x86_avx2_phadd_d: + fn = [&](auto a, auto b) -> expr { return a + b; }; + break; + case x86_ssse3_phadd_sw_128: + case x86_avx2_phadd_sw: + fn = [&](auto a, auto b) -> expr { return a.sadd_sat(b); }; + break; + case x86_ssse3_phsub_w_128: + case x86_ssse3_phsub_d_128: + case x86_avx2_phsub_w: + case x86_avx2_phsub_d: + fn = [&](auto a, auto b) -> expr { return a - b; }; + break; + case x86_ssse3_phsub_sw_128: + case x86_avx2_phsub_sw: + fn = [&](auto a, auto b) -> expr { return a.ssub_sat(b); }; + break; + default: + UNREACHABLE(); + } + for (unsigned j = 0; j != laneCount / groupsize; j++) { + for (unsigned i = 0; i != groupsize; i += 2) { + auto [a1, p1] = aty->extract(av, j * groupsize + i); + auto [a2, p2] = aty->extract(av, j * groupsize + i + 1); + vals.emplace_back(fn(a1, a2), p1 && p2); + } + for (unsigned i = 0; i != groupsize; i += 2) { + auto [b1, p1] = aty->extract(bv, j * groupsize + i); + auto [b2, p2] = aty->extract(bv, j * groupsize + i + 1); + vals.emplace_back(fn(b1, b2), p1 && p2); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_psrli_w: + case x86_sse2_psrli_d: + case x86_sse2_psrli_q: + case x86_avx2_psrli_w: + case x86_avx2_psrli_d: + case x86_avx2_psrli_q: + case x86_avx512_psrli_w_512: + case x86_avx512_psrli_d_512: + case x86_avx512_psrli_q_512: + case x86_sse2_psrai_w: + case x86_sse2_psrai_d: + case x86_avx2_psrai_w: + case x86_avx2_psrai_d: + case x86_avx512_psrai_w_512: + case x86_avx512_psrai_d_512: + case x86_avx512_psrai_q_128: + case x86_avx512_psrai_q_256: + case x86_avx512_psrai_q_512: + case x86_sse2_pslli_w: + case x86_sse2_pslli_d: + case x86_sse2_pslli_q: + case x86_avx2_pslli_w: + case x86_avx2_pslli_d: + case x86_avx2_pslli_q: + case x86_avx512_pslli_w_512: + case x86_avx512_pslli_d_512: + case x86_avx512_pslli_q_512: { + vector vals; + function fn; + switch (op) { + case x86_sse2_psrai_w: + case x86_sse2_psrai_d: + case x86_avx2_psrai_w: + case x86_avx2_psrai_d: + case x86_avx512_psrai_w_512: + case x86_avx512_psrai_d_512: + case x86_avx512_psrai_q_128: + case x86_avx512_psrai_q_256: + case x86_avx512_psrai_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkIf(a.isNegative(), expr::mkInt(-1, sz_a), + expr::mkUInt(0, sz_a)); + expr inbounds = a.ashr(b.zextOrTrunc(sz_a)); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + case x86_sse2_psrli_w: + case x86_sse2_psrli_d: + case x86_sse2_psrli_q: + case x86_avx2_psrli_w: + case x86_avx2_psrli_d: + case x86_avx2_psrli_q: + case x86_avx512_psrli_w_512: + case x86_avx512_psrli_d_512: + case x86_avx512_psrli_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkUInt(0, sz_a); + expr inbounds = a.lshr(b.zextOrTrunc(sz_a)); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + case x86_sse2_pslli_w: + case x86_sse2_pslli_d: + case x86_sse2_pslli_q: + case x86_avx2_pslli_w: + case x86_avx2_pslli_d: + case x86_avx2_pslli_q: + case x86_avx512_pslli_w_512: + case x86_avx512_pslli_d_512: + case x86_avx512_pslli_q_512: + fn = [&](auto a, auto b) -> expr { + unsigned sz_a = a.bits(); + expr check = b.uge(expr::mkUInt(sz_a, 32)); + expr outbounds = expr::mkUInt(0, sz_a); + expr inbounds = a << b.zextOrTrunc(sz_a); + return expr::mkIf(std::move(check), std::move(outbounds), + std::move(inbounds)); + }; + break; + default: + UNREACHABLE(); + } + for (unsigned i = 0, e = rty->numElementsConst(); i != e; ++i) { + auto ai = aty->extract(av, i); + vals.emplace_back(fn(ai.value, bv.value), ai.non_poison && bv.non_poison); + } + return rty->aggregateVals(vals); + } + case x86_sse2_pmadd_wd: + case x86_avx2_pmadd_wd: + case x86_avx512_pmaddw_d_512: + case x86_ssse3_pmadd_ub_sw_128: + case x86_avx2_pmadd_ub_sw: + case x86_avx512_pmaddubs_w_512: { + vector vals; + for (unsigned i = 0, e = shape_ret[op].first; i != e; ++i) { + auto [a1, a1p] = aty->extract(av, i * 2); + auto [a2, a2p] = aty->extract(av, i * 2 + 1); + auto [b1, b1p] = bty->extract(bv, i * 2); + auto [b2, b2p] = bty->extract(bv, i * 2 + 1); + + auto np = a1p && a2p && b1p && b2p; + + if (op == x86_sse2_pmadd_wd || op == x86_avx2_pmadd_wd || + op == x86_avx512_pmaddw_d_512) { + expr v = a1.sext(16) * b1.sext(16) + a2.sext(16) * b2.sext(16); + vals.emplace_back(std::move(v), std::move(np)); + } else { + expr v = (a1.zext(8) * b1.sext(8)).sadd_sat(a2.zext(8) * b2.sext(8)); + vals.emplace_back(std::move(v), std::move(np)); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_packsswb_128: + case x86_avx2_packsswb: + case x86_avx512_packsswb_512: + case x86_sse2_packuswb_128: + case x86_avx2_packuswb: + case x86_avx512_packuswb_512: + case x86_sse2_packssdw_128: + case x86_avx2_packssdw: + case x86_avx512_packssdw_512: + case x86_sse41_packusdw: + case x86_avx2_packusdw: + case x86_avx512_packusdw_512: { + vector vals; + function fn; + if (op == x86_sse2_packsswb_128 || op == x86_avx2_packsswb || + op == x86_avx512_packsswb_512 || op == x86_sse2_packssdw_128 || + op == x86_avx2_packssdw || op == x86_avx512_packssdw_512) { + fn = [&](auto a) -> expr { + unsigned bw = a.bits() / 2; + auto min = expr::IntSMin(bw); + auto max = expr::IntSMax(bw); + return expr::mkIf(a.sle(min.sext(bw)), min, + expr::mkIf(a.sge(max.sext(bw)), max, a.trunc(bw))); + }; + } else { + fn = [&](auto a) -> expr { + unsigned bw = a.bits() / 2; + auto max = expr::IntUMax(bw); + auto zero = expr::mkUInt(0, bw); + return expr::mkIf(a.sle(zero.zext(bw)), zero, + expr::mkIf(a.sge(max.zext(bw)), max, a.trunc(bw))); + }; + } + + unsigned groupsize = 128 / shape_op1[op].second; + unsigned laneCount = shape_op1[op].first; + for (unsigned j = 0; j != laneCount / groupsize; j++) { + for (unsigned i = 0; i != groupsize; i++) { + auto [a1, p1] = aty->extract(av, j * groupsize + i); + vals.emplace_back(fn(std::move(a1)), std::move(p1)); + } + for (unsigned i = 0; i != groupsize; i++) { + auto [b1, p1] = aty->extract(bv, j * groupsize + i); + vals.emplace_back(fn(std::move(b1)), std::move(p1)); + } + } + return rty->aggregateVals(vals); + } + case x86_sse2_psad_bw: + case x86_avx2_psad_bw: + case x86_avx512_psad_bw_512: { + unsigned ngroup = shape_ret[op].first; + vector vals; + for (unsigned j = 0; j < ngroup; ++j) { + expr np = true; + expr v; + for (unsigned i = 0; i < 8; ++i) { + auto [a, ap] = aty->extract(av, 8 * j + i); + auto [b, bp] = bty->extract(bv, 8 * j + i); + np = np && ap && bp; + if (i == 0) + v = (a.zext(8) - b.zext(8)).abs(); + else + v = v + (a.zext(8) - b.zext(8)).abs(); + } + vals.emplace_back(v.zext(48), std::move(np)); + } + return rty->aggregateVals(vals); + } + // TODO: add semantic for other intrinsics + default: + UNREACHABLE(); + } +} + +expr X86IntrinBinOp::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + (shape_op0[op].first != 1 + ? a->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op0[op].second); + }) && + a->getType().getAsAggregateType()->numElements() == + shape_op0[op].first + : a->getType().enforceIntType(shape_op0[op].second)) && + (shape_op1[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op1[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + shape_op1[op].first + : b->getType().enforceIntType(shape_op1[op].second)) && + (shape_ret[op].first != 1 + ? getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_ret[op].second); + }) && + getType().getAsAggregateType()->numElements() == + shape_ret[op].first + : getType().enforceIntType(shape_ret[op].second)); +} + +unique_ptr X86IntrinBinOp::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *a, *b, op); +} + +string X86IntrinTerOp::getOpName(Op op) { + switch (op) { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) \ + case NAME: \ + return #NAME; +#include "intrinsics_terop.h" +#undef PROCESS + } + UNREACHABLE(); +} + +void X86IntrinTerOp::print(ostream &os) const { + os << getName() << " = " << getOpName(op) << " " << *a << ", " << *b; +} + +StateValue X86IntrinTerOp::toSMT(State &s) const { + auto rty = getType().getAsAggregateType(); + auto aty = a->getType().getAsAggregateType(); + auto bty = b->getType().getAsAggregateType(); + auto cty = c->getType().getAsAggregateType(); + auto &av = s[*a]; + auto &bv = s[*b]; + auto &cv = s[*c]; + + switch (op) { + case x86_avx2_pblendvb: { + vector vals; + + for (int i = 0; i < 32; ++i) { + auto [a, ap] = aty->extract(av, i); + auto [b, bp] = bty->extract(bv, i); + auto [c, cp] = cty->extract(cv, i); + auto v = expr::mkIf(c.extract(7, 7) == expr::mkUInt(0, 1), a, b); + vals.emplace_back(std::move(v), ap && bp && cp); + } + return rty->aggregateVals(vals); + } + // TODO: add semantic for other intrinsics + default: + UNREACHABLE(); + } +} + +expr X86IntrinTerOp::getTypeConstraints(const Function &f) const { + return Value::getTypeConstraints() && + (shape_op0[op].first != 1 + ? a->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op0[op].second); + }) && + a->getType().getAsAggregateType()->numElements() == + shape_op0[op].first + : a->getType().enforceIntType(shape_op0[op].second)) && + (shape_op1[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op1[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + shape_op1[op].first + : b->getType().enforceIntType(shape_op1[op].second)) && + (shape_op2[op].first != 1 + ? b->getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_op2[op].second); + }) && + b->getType().getAsAggregateType()->numElements() == + shape_op2[op].first + : b->getType().enforceIntType(shape_op2[op].second)) && + (shape_ret[op].first != 1 + ? getType().enforceVectorType([this](auto &ty) { + return ty.enforceIntType(shape_ret[op].second); + }) && + getType().getAsAggregateType()->numElements() == + shape_ret[op].first + : getType().enforceIntType(shape_ret[op].second)); +} + +unique_ptr X86IntrinTerOp::dup(Function &f, const string &suffix) const { + return make_unique(getType(), getName() + suffix, *a, *b, *c, + op); +} + +vector X86IntrinTerOp::operands() const { + return {a, b, c}; +} + +bool X86IntrinTerOp::propagatesPoison() const { + return true; +} + +bool X86IntrinTerOp::hasSideEffects() const { + return false; +} + +void X86IntrinTerOp::rauw(const Value &what, Value &with) { + RAUW(a); + RAUW(b); + RAUW(c); +} const ConversionOp* isCast(ConversionOp::Op op, const Value &v) { auto c = dynamic_cast(&v); diff --git a/ir/instr.h b/ir/instr.h index d77ad38c1..473fb4d86 100644 --- a/ir/instr.h +++ b/ir/instr.h @@ -532,8 +532,8 @@ class Phi final : public Instr { void print(std::ostream &os) const override; StateValue toSMT(State &s) const override; smt::expr getTypeConstraints(const Function &f) const override; - std::unique_ptr - dup(Function &f, const std::string &suffix) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; }; @@ -1286,6 +1286,141 @@ class ShuffleVector final : public Instr { dup(Function &f, const std::string &suffix) const override; }; +class FakeShuffle final : public Instr { + Value *v1, *v2, *mask; + +public: + FakeShuffle(Type &type, std::string &&name, Value &v1, Value &v2, Value &mask) + : Instr(type, std::move(name)), v1(&v1), v2(&v2), mask(&mask) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; + +class X86IntrinBinOp final : public Instr { +public: + static constexpr unsigned numOfX86Intrinsics = 135; + enum Op { +#define PROCESS(NAME, A, B, C, D, E, F) NAME, +#include "intrinsics_binop.h" +#undef PROCESS + }; + + // the shape of a vector is stored as <# of lanes, element bits> + static constexpr std::array, numOfX86Intrinsics> + shape_op0 = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(C, D), +#include "intrinsics_binop.h" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_op1 = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(E, F), +#include "intrinsics_binop.h" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_ret = { +#define PROCESS(NAME, A, B, C, D, E, F) std::make_pair(A, B), +#include "intrinsics_binop.h" +#undef PROCESS + }; + static constexpr std::array ret_width = { +#define PROCESS(NAME, A, B, C, D, E, F) A *B, +#include "intrinsics_binop.h" +#undef PROCESS + }; + +private: + Value *a, *b; + Op op; + +public: + static unsigned getRetWidth(Op op) { + return ret_width[op]; + } + X86IntrinBinOp(Type &type, std::string &&name, Value &a, Value &b, Op op) + : Instr(type, std::move(name)), a(&a), b(&b), op(op) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + static std::string getOpName(Op op); + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; + +class X86IntrinTerOp final : public Instr { +public: + static constexpr unsigned numOfX86Intrinsics = 1; + enum Op { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) NAME, +#include "intrinsics_terop.h" +#undef PROCESS + }; + + // the shape of a vector is stored as <# of lanes, element bits> + static constexpr std::array, numOfX86Intrinsics> + shape_op0 = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(C, D), +#include "intrinsics_terop.h" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_op1 = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(E, F), +#include "intrinsics_terop.h" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_op2 = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(G, H), +#include "intrinsics_terop.h" +#undef PROCESS + }; + static constexpr std::array, numOfX86Intrinsics> + shape_ret = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) std::make_pair(A, B), +#include "intrinsics_terop.h" +#undef PROCESS + }; + static constexpr std::array ret_width = { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) A *B, +#include "intrinsics_terop.h" +#undef PROCESS + }; + +private: + Value *a, *b, *c; + Op op; + +public: + static unsigned getRetWidth(Op op) { + return ret_width[op]; + } + X86IntrinTerOp(Type &type, std::string &&name, Value &a, Value &b, Value &c, + Op op) + : Instr(type, std::move(name)), a(&a), b(&b), c(&c), op(op) {} + std::vector operands() const override; + bool propagatesPoison() const override; + bool hasSideEffects() const override; + void rauw(const Value &what, Value &with) override; + static std::string getOpName(Op op); + void print(std::ostream &os) const override; + StateValue toSMT(State &s) const override; + smt::expr getTypeConstraints(const Function &f) const override; + std::unique_ptr dup(Function &f, + const std::string &suffix) const override; +}; const ConversionOp *isCast(ConversionOp::Op op, const Value &v); Value *isNoOp(const Value &v); diff --git a/ir/intrinsics_binop.h b/ir/intrinsics_binop.h new file mode 100644 index 000000000..ab0483932 --- /dev/null +++ b/ir/intrinsics_binop.h @@ -0,0 +1,135 @@ +PROCESS(x86_sse2_pavg_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_pavg_b, 16, 8, 16, 8, 16, 8) +PROCESS(x86_avx2_pavg_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_pavg_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_avx512_pavg_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_avx512_pavg_b_512, 64, 8, 64, 8, 64, 8) +PROCESS(x86_avx2_pshuf_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_ssse3_pshuf_b_128, 16, 8, 16, 8, 16, 8) +PROCESS(x86_avx512_pshuf_b_512, 64, 8, 64, 8, 64, 8) +PROCESS(x86_sse2_psrl_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psrl_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_sse2_psrl_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psrl_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psrl_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx2_psrl_q, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psrl_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psrl_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psrl_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_psrli_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_psrli_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_sse2_psrli_q, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx2_psrli_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_psrli_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx2_psrli_q, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_psrli_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_psrli_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_psrli_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psrlv_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psrlv_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_psrlv_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psrlv_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psrlv_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psrlv_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psrlv_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psrlv_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psrlv_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_psra_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psra_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psra_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psra_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx512_psra_q_128, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx512_psra_q_256, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psra_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psra_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psra_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_psrai_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_psrai_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_avx2_psrai_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_psrai_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx512_psrai_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_psrai_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_psrai_q_128, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx512_psrai_q_256, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_psrai_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psrav_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psrav_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx512_psrav_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psrav_q_128, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx512_psrav_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psrav_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psrav_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psrav_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psrav_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_psll_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_sse2_psll_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_sse2_psll_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psll_w, 16, 16, 16, 16, 8, 16) +PROCESS(x86_avx2_psll_d, 8, 32, 8, 32, 4, 32) +PROCESS(x86_avx2_psll_q, 4, 64, 4, 64, 2, 64) +PROCESS(x86_avx512_psll_w_512, 32, 16, 32, 16, 8, 16) +PROCESS(x86_avx512_psll_d_512, 16, 32, 16, 32, 4, 32) +PROCESS(x86_avx512_psll_q_512, 8, 64, 8, 64, 2, 64) +PROCESS(x86_sse2_pslli_w, 8, 16, 8, 16, 1, 32) +PROCESS(x86_sse2_pslli_d, 4, 32, 4, 32, 1, 32) +PROCESS(x86_sse2_pslli_q, 2, 64, 2, 64, 1, 32) +PROCESS(x86_avx2_pslli_w, 16, 16, 16, 16, 1, 32) +PROCESS(x86_avx2_pslli_d, 8, 32, 8, 32, 1, 32) +PROCESS(x86_avx2_pslli_q, 4, 64, 4, 64, 1, 32) +PROCESS(x86_avx512_pslli_w_512, 32, 16, 32, 16, 1, 32) +PROCESS(x86_avx512_pslli_d_512, 16, 32, 16, 32, 1, 32) +PROCESS(x86_avx512_pslli_q_512, 8, 64, 8, 64, 1, 32) +PROCESS(x86_avx2_psllv_d, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psllv_d_256, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_psllv_q, 2, 64, 2, 64, 2, 64) +PROCESS(x86_avx2_psllv_q_256, 4, 64, 4, 64, 4, 64) +PROCESS(x86_avx512_psllv_d_512, 16, 32, 16, 32, 16, 32) +PROCESS(x86_avx512_psllv_q_512, 8, 64, 8, 64, 8, 64) +PROCESS(x86_avx512_psllv_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx512_psllv_w_256, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_psllv_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_ssse3_psign_b_128, 16, 8, 16, 8, 16, 8) +PROCESS(x86_ssse3_psign_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_psign_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_avx2_psign_b, 32, 8, 32, 8, 32, 8) +PROCESS(x86_avx2_psign_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_psign_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_ssse3_phadd_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_phadd_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_ssse3_phadd_sw_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_phadd_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_phadd_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_phadd_sw, 16, 16, 16, 16, 16, 16) +PROCESS(x86_ssse3_phsub_w_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_ssse3_phsub_d_128, 4, 32, 4, 32, 4, 32) +PROCESS(x86_ssse3_phsub_sw_128, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_phsub_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx2_phsub_d, 8, 32, 8, 32, 8, 32) +PROCESS(x86_avx2_phsub_sw, 16, 16, 16, 16, 16, 16) +PROCESS(x86_sse2_pmulh_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_pmulh_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_pmulh_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_pmulhu_w, 8, 16, 8, 16, 8, 16) +PROCESS(x86_avx2_pmulhu_w, 16, 16, 16, 16, 16, 16) +PROCESS(x86_avx512_pmulhu_w_512, 32, 16, 32, 16, 32, 16) +PROCESS(x86_sse2_pmadd_wd, 4, 32, 8, 16, 8, 16) +PROCESS(x86_avx2_pmadd_wd, 8, 32, 16, 16, 16, 16) +PROCESS(x86_avx512_pmaddw_d_512, 16, 32, 32, 16, 32, 16) +PROCESS(x86_ssse3_pmadd_ub_sw_128, 8, 16, 16, 8, 16, 8) +PROCESS(x86_avx2_pmadd_ub_sw, 16, 16, 32, 8, 32, 8) +PROCESS(x86_avx512_pmaddubs_w_512, 32, 16, 64, 8, 64, 8) +PROCESS(x86_sse2_packsswb_128, 16, 8, 8, 16, 8, 16) +PROCESS(x86_avx2_packsswb, 32, 8, 16, 16, 16, 16) +PROCESS(x86_avx512_packsswb_512, 64, 8, 32, 16, 32, 16) +PROCESS(x86_sse2_packuswb_128, 16, 8, 8, 16, 8, 16) +PROCESS(x86_avx2_packuswb, 32, 8, 16, 16, 16, 16) +PROCESS(x86_avx512_packuswb_512, 64, 8, 32, 16, 32, 16) +PROCESS(x86_sse2_packssdw_128, 8, 16, 4, 32, 4, 32) +PROCESS(x86_avx2_packssdw, 16, 16, 8, 32, 8, 32) +PROCESS(x86_avx512_packssdw_512, 32, 16, 16, 32, 16, 32) +PROCESS(x86_sse41_packusdw, 8, 16, 4, 32, 4, 32) +PROCESS(x86_avx2_packusdw, 16, 16, 8, 32, 8, 32) +PROCESS(x86_avx512_packusdw_512, 32, 16, 16, 32, 16, 32) +PROCESS(x86_sse2_psad_bw, 2, 64, 16, 8, 16, 8) +PROCESS(x86_avx2_psad_bw, 4, 64, 32, 8, 32, 8) +PROCESS(x86_avx512_psad_bw_512, 8, 64, 64, 8, 64, 8) \ No newline at end of file diff --git a/ir/intrinsics_terop.h b/ir/intrinsics_terop.h new file mode 100644 index 000000000..9d0265d02 --- /dev/null +++ b/ir/intrinsics_terop.h @@ -0,0 +1 @@ +PROCESS(x86_avx2_pblendvb, 32, 8, 32, 8, 32, 8, 32, 8) \ No newline at end of file diff --git a/ir/type.cpp b/ir/type.cpp index ee7c1e805..ad3d530d1 100644 --- a/ir/type.cpp +++ b/ir/type.cpp @@ -6,6 +6,7 @@ #include "ir/state.h" #include "smt/solver.h" #include "util/compiler.h" +#include "util/config.h" #include #include #include @@ -447,6 +448,9 @@ expr FloatType::getFloat(const expr &v) const { expr FloatType::fromFloat(State &s, const expr &fp, const Type &from_type0, unsigned nary, const expr &a, const expr &b, const expr &c) const { + if (config::use_exact_fp) + return fp.float2BV(); + expr isnan = fp.isNaN(); expr val = fp.float2BV(); diff --git a/llvm_util/known_fns.cpp b/llvm_util/known_fns.cpp index 440dcb384..bd74109fd 100644 --- a/llvm_util/known_fns.cpp +++ b/llvm_util/known_fns.cpp @@ -516,6 +516,12 @@ known_call(llvm::CallInst &i, const llvm::TargetLibraryInfo &TLI, RETURN_EXACT(); auto decl = i.getCalledFunction(); + + if (decl && decl->hasName() && decl->getName().starts_with("__fksv")) { + RETURN_VAL(make_unique(*ty, value_name(i), *args[0], *args[1], + *args[2])); + } + llvm::LibFunc libfn; if (!decl || !TLI.getLibFunc(*decl, libfn)) RETURN_EXACT(); diff --git a/llvm_util/llvm2alive.cpp b/llvm_util/llvm2alive.cpp index 928e47a26..eb6905777 100644 --- a/llvm_util/llvm2alive.cpp +++ b/llvm_util/llvm2alive.cpp @@ -15,6 +15,7 @@ #include "llvm/IR/InstVisitor.h" #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ModRef.h" #include @@ -1222,6 +1223,45 @@ class llvm2alive_ : public llvm::InstVisitor> { case llvm::Intrinsic::prefetch: return NOP(i); + // intel x86 intrinsics +#define PROCESS(NAME, A, B, C, D, E, F) case llvm::Intrinsic::NAME: +#include "ir/intrinsics_binop.h" +#undef PROCESS + { + PARSE_BINOP(); + X86IntrinBinOp::Op op; + switch (i.getIntrinsicID()) { +#define PROCESS(NAME, A, B, C, D, E, F) \ + case llvm::Intrinsic::NAME: \ + op = X86IntrinBinOp::NAME; \ + break; +#include "ir/intrinsics_binop.h" +#undef PROCESS + default: + UNREACHABLE(); + } + return make_unique(*ty, value_name(i), *a, *b, op); + } + +#define PROCESS(NAME, A, B, C, D, E, F, G, H) case llvm::Intrinsic::NAME: +#include "ir/intrinsics_terop.h" +#undef PROCESS + { + PARSE_TRIOP(); + X86IntrinTerOp::Op op; + switch (i.getIntrinsicID()) { +#define PROCESS(NAME, A, B, C, D, E, F, G, H) \ + case llvm::Intrinsic::NAME: \ + op = X86IntrinTerOp::NAME; \ + break; +#include "ir/intrinsics_terop.h" +#undef PROCESS + default: + UNREACHABLE(); + } + return make_unique(*ty, value_name(i), *a, *b, *c, op); + } + default: break; } diff --git a/llvm_util/utils.cpp b/llvm_util/utils.cpp index a359edea0..4012cead3 100644 --- a/llvm_util/utils.cpp +++ b/llvm_util/utils.cpp @@ -205,7 +205,7 @@ Type* llvm_type2alive(const llvm::Type *ty) { auto vty = cast(ty); auto elems = vty->getElementCount().getKnownMinValue(); auto ety = llvm_type2alive(vty->getElementType()); - if (!ety || elems > 1024) + if (!ety || elems > 2048) return nullptr; cache = make_unique("ty_" + to_string(type_id_counter++), elems, *ety); diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll new file mode 100644 index 000000000..fc0b1aa36 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll new file mode 100644 index 000000000..cc4406a27 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll new file mode 100644 index 000000000..5c28e9b95 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-2.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %neg = sub <8 x i32> zeroinitializer, %v + ret <8 x i32> %neg +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll new file mode 100644 index 000000000..246786752 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psign_w-3.srctgt.ll @@ -0,0 +1,12 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %v, <8 x i32> ) + %2 = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %1, <8 x i32> ) + ret <8 x i32> %2 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %neg = sub <8 x i32> zeroinitializer, %v + ret <8 x i32> %neg +} + +declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll new file mode 100644 index 000000000..32ce38d0c --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll new file mode 100644 index 000000000..dc39ecc52 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-15.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -disable-undef-input +; disabled undef inputs to accelerate the verification + +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll new file mode 100644 index 000000000..7365b7355 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-3.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll new file mode 100644 index 000000000..4e454a092 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll new file mode 100644 index 000000000..a5d6914b9 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll new file mode 100644 index 000000000..1e9fd5c72 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> zeroinitializer +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll new file mode 100644 index 000000000..dcae58ef7 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll new file mode 100644 index 000000000..83e9468ee --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-overflow5.srctgt.ll @@ -0,0 +1,14 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + %tmp = lshr <8 x i32> %v, + ret <8 x i32> %tmp +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll new file mode 100644 index 000000000..3f6994fba --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> poison +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll new file mode 100644 index 000000000..ea86f827a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_d-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i32> @src(<8 x i32> %v) { + %1 = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> ) + ret <8 x i32> %1 +} + +define <8 x i32> @tgt(<8 x i32> %v) { + ret <8 x i32> %v +} + +declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll new file mode 100644 index 000000000..4f23a76a3 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-0.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll new file mode 100644 index 000000000..e56cf907f --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-15.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -disable-undef-input +; disabled undef inputs to accelerate the verification + +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + %tmp = lshr <4 x i64> %v, + ret <4 x i64> %tmp +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll new file mode 100644 index 000000000..00437e136 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-3.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + %tmp = lshr <4 x i64> %v, + ret <4 x i64> %tmp +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll new file mode 100644 index 000000000..abb3c758a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll new file mode 100644 index 000000000..a675c22df --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll new file mode 100644 index 000000000..66bec3e8a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> zeroinitializer +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll new file mode 100644 index 000000000..10d8981e4 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll new file mode 100644 index 000000000..807017d24 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll new file mode 100644 index 000000000..5636e4de3 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> poison +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll new file mode 100644 index 000000000..236ef23c1 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_q-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i64> @src(<4 x i64> %v) { + %1 = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> ) + ret <4 x i64> %1 +} + +define <4 x i64> @tgt(<4 x i64> %v) { + ret <4 x i64> %v +} + +declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll new file mode 100644 index 000000000..a299a1b67 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll new file mode 100644 index 000000000..7f2e6a60b --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-15.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -disable-undef-input +; disabled undef inputs to accelerate the verification + +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + %tmp = lshr <16 x i16> %v, + ret <16 x i16> %tmp +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll new file mode 100644 index 000000000..193f599c9 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-3.srctgt.ll @@ -0,0 +1,13 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + %tmp = lshr <16 x i16> %v, + ret <16 x i16> %tmp +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll new file mode 100644 index 000000000..26e70e2cd --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll new file mode 100644 index 000000000..3f1154eb2 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll new file mode 100644 index 000000000..a15297fab --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> zeroinitializer +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll new file mode 100644 index 000000000..0b1a429b0 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow4.srctgt.ll @@ -0,0 +1,14 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined + diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll new file mode 100644 index 000000000..9a8d45ad5 --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll new file mode 100644 index 000000000..79ced610a --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> poison +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll b/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll new file mode 100644 index 000000000..2d0ac7d6e --- /dev/null +++ b/tests/alive-tv/vector/x86/avx2_psrl_w-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <16 x i16> @src(<16 x i16> %v) { + %1 = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> ) + ret <16 x i16> %1 +} + +define <16 x i16> @tgt(<16 x i16> %v) { + ret <16 x i16> %v +} + +declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll new file mode 100644 index 000000000..d7e55fa87 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-8-01.srctgt.ll @@ -0,0 +1,16 @@ + + +define <32 x i8> @tgt() { +entry: + ret <32 x i8> +} +define <32 x i8> @src() { +entry: + %calltmp = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> , <32 x i8> ) + ret <32 x i8> %calltmp +} + + +declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) + + diff --git a/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll new file mode 100644 index 000000000..244451bd2 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-8-02.srctgt.ll @@ -0,0 +1,11 @@ + +define <64 x i8> @tgt() { +entry: + ret <64 x i8> +} +define <64 x i8> @src() { +entry: + %calltmp = call <64 x i8> @llvm.x86.avx512.pavg.b.512(<64 x i8> , <64 x i8> ) + ret <64 x i8> %calltmp +} +declare <64 x i8> @llvm.x86.avx512.pavg.b.512 (<64 x i8>, <64 x i8>) diff --git a/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll new file mode 100644 index 000000000..825752f25 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-01.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> , <8 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll new file mode 100644 index 000000000..736243a6c --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-02.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src() { +entry: + %calltmp = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> , <4 x i32> ) + ret <8 x i32> %calltmp +} +define <8 x i32> @tgt() { +entry: + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll new file mode 100644 index 000000000..890add5b3 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-03.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll new file mode 100644 index 000000000..ca2da0440 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-04.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src() { +entry: + %calltmp = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> , <8 x i32> ) + ret <8 x i32> %calltmp +} +define <8 x i32> @tgt() { +entry: + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll new file mode 100644 index 000000000..b9ca1e0e3 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-05.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll new file mode 100644 index 000000000..827d20c56 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-06.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll new file mode 100644 index 000000000..9951d2b1b --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-07.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i32> @src() { +entry: + %calltmp = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> , <8 x i32> ) + ret <8 x i32> %calltmp +} +define <8 x i32> @tgt() { +entry: + ret <8 x i32> +} + +declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll new file mode 100644 index 000000000..7c98d69d3 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-08.srctgt.ll @@ -0,0 +1,11 @@ +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> , <16 x i16> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll new file mode 100644 index 000000000..4cadbdd64 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-09.srctgt.ll @@ -0,0 +1,11 @@ +define <8 x i16> @src() { +entry: + %calltmp = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> , <16 x i8> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { +entry: + ret <8 x i16> +} + +declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll new file mode 100644 index 000000000..465e03347 --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-10.srctgt.ll @@ -0,0 +1,12 @@ + +define <16 x i16> @src() { +entry: + %calltmp = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> , <32 x i8> ) + ret <16 x i16> %calltmp +} +define <16 x i16> @tgt() { +entry: + ret <16 x i16> +} + +declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll new file mode 100644 index 000000000..3f6def4bc --- /dev/null +++ b/tests/alive-tv/vector/x86/regression-feb-9-11.srctgt.ll @@ -0,0 +1,11 @@ +define <32 x i16> @src() { +entry: + %calltmp = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> , <64 x i8> ) + ret <32 x i16> %calltmp +} +define <32 x i16> @tgt() { +entry: + ret <32 x i16> +} + +declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>) \ No newline at end of file diff --git a/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll b/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll new file mode 100644 index 000000000..469f6abae --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_pavg_w_0-failed.srctgt.ll @@ -0,0 +1,12 @@ +; ERROR: Value mismatch + +; test found by Stefan's random tester + +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> , <8 x i16> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { + ret <8 x i16> +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll new file mode 100644 index 000000000..42abb8ef7 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_pavg_w_0.srctgt.ll @@ -0,0 +1,10 @@ +; test found by Stefan's random tester + +define <8 x i16> @src() { + %calltmp = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> , <8 x i16> ) + ret <8 x i16> %calltmp +} +define <8 x i16> @tgt() { + ret <8 x i16> +} +declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll new file mode 100644 index 000000000..8a39bd30e --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-0.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll new file mode 100644 index 000000000..65e2d153a --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-15.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -disable-undef-input +; disabled undef inputs to accelerate the verification + +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll new file mode 100644 index 000000000..4f1ffb526 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-3.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll new file mode 100644 index 000000000..074634471 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll new file mode 100644 index 000000000..51300dca4 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll new file mode 100644 index 000000000..da506e2c3 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> zeroinitializer +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll new file mode 100644 index 000000000..288736109 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll new file mode 100644 index 000000000..290b11284 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-overflow5.srctgt.ll @@ -0,0 +1,14 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + %tmp = lshr <4 x i32> %v, + ret <4 x i32> %tmp +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll new file mode 100644 index 000000000..5c5449aa1 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> poison +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll new file mode 100644 index 000000000..3152e1440 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_d-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <4 x i32> @src(<4 x i32> %v) { + %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> ) + ret <4 x i32> %1 +} + +define <4 x i32> @tgt(<4 x i32> %v) { + ret <4 x i32> %v +} + +declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll new file mode 100644 index 000000000..9b8319a4d --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-0.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll new file mode 100644 index 000000000..0dff78493 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-15.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -disable-undef-input +; disabled undef inputs to accelerate the verification + +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + %tmp = lshr <2 x i64> %v, + ret <2 x i64> %tmp +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll new file mode 100644 index 000000000..7f1b08191 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-3.srctgt.ll @@ -0,0 +1,13 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + %tmp = lshr <2 x i64> %v, + ret <2 x i64> %tmp +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll new file mode 100644 index 000000000..51e949fd8 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll new file mode 100644 index 000000000..3198d8831 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll new file mode 100644 index 000000000..1edd0efeb --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> zeroinitializer +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll new file mode 100644 index 000000000..c539ea9fb --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow4.srctgt.ll @@ -0,0 +1,13 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll new file mode 100644 index 000000000..8bdf0cc5a --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll new file mode 100644 index 000000000..127443d74 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> poison +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll new file mode 100644 index 000000000..78e237a4c --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_q-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <2 x i64> @src(<2 x i64> %v) { + %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> ) + ret <2 x i64> %1 +} + +define <2 x i64> @tgt(<2 x i64> %v) { + ret <2 x i64> %v +} + +declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll new file mode 100644 index 000000000..51e8ba278 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-0.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll new file mode 100644 index 000000000..893d69653 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-15.srctgt.ll @@ -0,0 +1,14 @@ +; TEST-ARGS: -disable-undef-input +; disabled undef inputs to accelerate the verification + +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + %tmp = lshr <8 x i16> %v, + ret <8 x i16> %tmp +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll new file mode 100644 index 000000000..81c57f18f --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-3.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + %tmp = lshr <8 x i16> %v, + ret <8 x i16> %tmp +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; ERROR: Value mismatch diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll new file mode 100644 index 000000000..24c2a90e0 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll new file mode 100644 index 000000000..33a2a9d38 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll new file mode 100644 index 000000000..caa32b663 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow3.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> zeroinitializer +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll new file mode 100644 index 000000000..afbc266ea --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow4.srctgt.ll @@ -0,0 +1,14 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined + diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll new file mode 100644 index 000000000..6d82e3d49 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-overflow5.srctgt.ll @@ -0,0 +1,13 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) + +; source = <0, 0, 0 ... >, tgt = when %input = +; ERROR: Target's return value is more undefined diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll new file mode 100644 index 000000000..bc03df188 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-poison1.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> poison +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll b/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll new file mode 100644 index 000000000..29e04d604 --- /dev/null +++ b/tests/alive-tv/vector/x86/sse2_psrl_w-poison2.srctgt.ll @@ -0,0 +1,10 @@ +define <8 x i16> @src(<8 x i16> %v) { + %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> ) + ret <8 x i16> %1 +} + +define <8 x i16> @tgt(<8 x i16> %v) { + ret <8 x i16> %v +} + +declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) diff --git a/util/config.cpp b/util/config.cpp index 82ab5f66a..dd11aac72 100644 --- a/util/config.cpp +++ b/util/config.cpp @@ -15,6 +15,7 @@ bool skip_smt = false; string smt_benchmark_dir; bool disable_poison_input = false; bool disable_undef_input = false; +bool use_exact_fp = false; bool tgt_is_asm = false; bool fail_if_src_is_ub = false; bool disallow_ub_exploitation = false; diff --git a/util/config.h b/util/config.h index 57f58c888..397168875 100644 --- a/util/config.h +++ b/util/config.h @@ -19,6 +19,8 @@ extern bool disable_poison_input; extern bool disable_undef_input; +extern bool use_exact_fp; + extern bool tgt_is_asm; extern bool fail_if_src_is_ub;