Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resolve some issues impacting the jitstress-isas-x86 runs #104935

Merged
merged 3 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1413,8 +1413,23 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize));
retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
if (IsBaselineSimdIsaSupported())
{
op1 = gtFoldExpr(gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize));
retNode = gtNewSimdBinOpNode(GT_AND, retType, op1, op2, simdBaseJitType, simdSize);
}
else
{
// We need to ensure we import even if SSE2 is disabled
assert(intrinsic == NI_SSE_AndNot);

op3 = gtNewAllBitsSetConNode(retType);

op1 = gtNewSimdHWIntrinsicNode(retType, op1, op3, NI_SSE_Xor, simdBaseJitType, simdSize);
op1 = gtFoldExpr(op1);

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, NI_SSE_And, simdBaseJitType, simdSize);
}
break;
}

Expand Down
115 changes: 102 additions & 13 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1471,9 +1471,23 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
op2 = userIntrin->Op(1);
}

NamedIntrinsic intrinsic =
GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2, simdBaseType,
simdSize, false);
NamedIntrinsic intrinsic = NI_Illegal;

if (comp->IsBaselineSimdIsaSupported())
{
intrinsic = GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(comp, GT_AND_NOT, op1, op2,
simdBaseType, simdSize, false);
}
else
{
// We need to ensure we optimize even if SSE2 is disabled

assert(simdBaseType == TYP_FLOAT);
assert(simdSize <= 16);

intrinsic = NI_SSE_AndNot;
}

userIntrin->ResetHWIntrinsicId(intrinsic, comp, op1, op2);

return nextNode;
Expand All @@ -1487,24 +1501,55 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
BlockRange().Remove(node);
op3 = userIntrin->Op(2);

// Tracks which two operands get used first
TernaryLogicUseFlags firstOpUseFlags = TernaryLogicUseFlags::AB;

if (op3 == node)
{
op3 = userIntrin->Op(1);
if (userOper == GT_AND_NOT)
{
op3 = op2;
op2 = op1;
op1 = userIntrin->Op(1);

// AND_NOT isn't commutative so we need to shift parameters down
firstOpUseFlags = TernaryLogicUseFlags::BC;
}
else
{
op3 = userIntrin->Op(1);
}
}

uint8_t controlByte = 0x00;

if ((userOper == GT_XOR) && op3->IsVectorAllBitsSet())
{
// We're being used by what is actually GT_NOT, so we
// need to shift parameters down so that A is unused
// We have XOR(OP(A, B), AllBitsSet)
// A: op1
// B: op2
// C: op3 (AllBitsSet)
//
// We want A to be the unused parameter so swap it around
// A: op3 (AllBitsSet)
// B: op1
// C: op2
//
// This gives us NOT(OP(B, C))

assert(firstOpUseFlags == TernaryLogicUseFlags::AB);

std::swap(op2, op3);
std::swap(op1, op2);

if (isOperNot)
{
// We have what is actually a double not, so just return op2
// We have NOT(XOR(B, AllBitsSet))
// A: op3 (AllBitsSet)
// B: op1
// C: op2 (AllBitsSet)
//
// This represents a double not, so so just return op2
// which is the only actual value now that the parameters
// were shifted around

Expand Down Expand Up @@ -1538,20 +1583,64 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
}
else if (isOperNot)
{
// A is unused, so we just want OP(NOT(B), C)
if (firstOpUseFlags == TernaryLogicUseFlags::AB)
{
// We have OP(XOR(A, AllBitsSet), C)
// A: op1
// B: op2 (AllBitsSet)
// C: op3
//
// We want A to be the unused parameter so swap it around
// A: op2 (AllBitsSet)
// B: op1
// C: op3
//
// This gives us OP(NOT(B), C)

assert(op2->IsVectorAllBitsSet());
std::swap(op1, op2);
assert(op2->IsVectorAllBitsSet());
std::swap(op1, op2);

controlByte = static_cast<uint8_t>(~B);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
controlByte = static_cast<uint8_t>(~B);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
}
else
{
// We have OP(A, XOR(B, AllBitsSet))
// A: op1
// B: op2
// C: op3 (AllBitsSet)
//
// We want A to be the unused parameter so swap it around
// A: op3 (AllBitsSet)
// B: op1
// C: op2
//
// This gives us OP(B, NOT(C))

assert(firstOpUseFlags == TernaryLogicUseFlags::BC);

assert(op3->IsVectorAllBitsSet());
std::swap(op2, op3);
std::swap(op1, op2);

controlByte = static_cast<uint8_t>(~C);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, B, controlByte);
}
}
else
else if (firstOpUseFlags == TernaryLogicUseFlags::AB)
{
// We have OP2(OP1(A, B), C)
controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, A, B);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, controlByte, C);
}
else
{
// We have OP2(A, OP1(B, C))
assert(firstOpUseFlags == TernaryLogicUseFlags::BC);

controlByte = TernaryLogicInfo::GetTernaryControlByte(oper, B, C);
controlByte = TernaryLogicInfo::GetTernaryControlByte(userOper, A, controlByte);
}

NamedIntrinsic ternaryLogicId = NI_AVX512F_TernaryLogic;

Expand Down
Loading