Skip to content

Commit

Permalink
Ensure that FMA codegen operand swapping matches the lsra selections (#…
Browse files Browse the repository at this point in the history
…62382)

* Ensure that FMA codegen operand swapping matches the lsra selections

* Ensure operands end up in the right slots
  • Loading branch information
tannergooding authored Dec 6, 2021
1 parent 1872ef5 commit c9f56b8
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 28 deletions.
78 changes: 51 additions & 27 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2034,9 +2034,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
var_types baseType = node->GetSimdBaseType();
emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize()));
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form
instruction _132form = (instruction)(ins - 1);
instruction _231form = (instruction)(ins + 1);
instruction _213form = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form
instruction _132form = (instruction)(_213form - 1);
instruction _231form = (instruction)(_213form + 1);
GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);
GenTree* op3 = node->Op(3);
Expand All @@ -2058,57 +2058,81 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
assert(!copiesUpperBits || !op1->isContained());

// We need to keep this in sync with lsraxarch.cpp
// Ideally we'd actually swap the operands in lsra and simplify codegen
// but its a bit more complicated to do so for many operands as well
// as being complicated to tell codegen how to pick the right instruction

instruction ins = INS_invalid;

if (op1->isContained() || op1->isUsedFromSpillTemp())
{
// targetReg == op3NodeReg or targetReg == ?
// op3 = ([op1] * op2) + op3
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
ins = _231form;
std::swap(emitOp1, emitOp3);

if (targetReg == op2NodeReg)
{
std::swap(emitOp1, emitOp2);
// op2 = ([op1] * op2) + op3
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
ins = _132form;
std::swap(emitOp2, emitOp3);
std::swap(emitOp1, emitOp2);
}
else
}
else if (op3->isContained() || op3->isUsedFromSpillTemp())
{
// targetReg could be op1NodeReg, op2NodeReg, or not equal to any op
// op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3]
// ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
ins = _213form;

if (!copiesUpperBits && (targetReg == op2NodeReg))
{
// targetReg == op3NodeReg or targetReg == ?
// op3 = ([op1] * op2) + op3
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
ins = _231form;
std::swap(emitOp1, emitOp3);
// op2 = (op1 * op2) + [op3]
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
std::swap(emitOp1, emitOp2);
}
}
else if (op2->isContained() || op2->isUsedFromSpillTemp())
{
// targetReg == op1NodeReg or targetReg == ?
// op1 = (op1 * [op2]) + op3
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
ins = _132form;
std::swap(emitOp2, emitOp3);

if (!copiesUpperBits && (targetReg == op3NodeReg))
{
// op3 = (op1 * [op2]) + op3
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1
ins = _231form;
std::swap(emitOp1, emitOp3);
}
else
{
// targetReg == op1NodeReg or targetReg == ?
// op1 = (op1 * [op2]) + op3
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2
ins = _132form;
std::swap(emitOp1, emitOp2);
}
std::swap(emitOp2, emitOp3);
}
else
{
// targetReg could be op1NodeReg, op2NodeReg, or not equal to any op
// op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3]
// ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
if (!copiesUpperBits && (targetReg == op2NodeReg))
// containedOpNum == 0
// no extra work when resultOpNum is 0 or 1
if (targetReg == op2NodeReg)
{
// op2 = (op1 * op2) + [op3]
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3]
ins = _213form;
std::swap(emitOp1, emitOp2);
}
else if (targetReg == op3NodeReg)
{
ins = _231form;
std::swap(emitOp1, emitOp3);
}
else
{
ins = _213form;
}
}

assert(ins != INS_invalid);
genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3);
genProduceReg(node);
}
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2305,6 +2305,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
// Intrinsics with CopyUpperBits semantics must have op1 as target
assert(containedOpNum != 1 || !copiesUpperBits);

// We need to keep this in sync with hwintrinsiccodegenxarch.cpp
// Ideally we'd actually swap the operands here and simplify codegen
// but its a bit more complicated to do so for many operands as well
// as being complicated to tell codegen how to pick the right instruction

if (containedOpNum == 1)
{
// https://github.com/dotnet/runtime/issues/62215
Expand All @@ -2316,7 +2321,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
if (resultOpNum == 2)
{
// op2 = ([op1] * op2) + op3
std::swap(emitOp2, emitOp3);
std::swap(emitOp1, emitOp2);
}
}
else if (containedOpNum == 3)
Expand Down

0 comments on commit c9f56b8

Please sign in to comment.