-
Notifications
You must be signed in to change notification settings - Fork 4.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Ensure that FMA codegen operand swapping matches the lsra selections #62382
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2034,9 +2034,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) | |
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); | ||
var_types baseType = node->GetSimdBaseType(); | ||
emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); | ||
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form | ||
instruction _132form = (instruction)(ins - 1); | ||
instruction _231form = (instruction)(ins + 1); | ||
instruction _213form = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); // 213 form | ||
instruction _132form = (instruction)(_213form - 1); | ||
instruction _231form = (instruction)(_213form + 1); | ||
GenTree* op1 = node->Op(1); | ||
GenTree* op2 = node->Op(2); | ||
GenTree* op3 = node->Op(3); | ||
|
@@ -2058,57 +2058,81 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) | |
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained | ||
assert(!copiesUpperBits || !op1->isContained()); | ||
|
||
// We need to keep this in sync with lsraxarch.cpp | ||
// Ideally we'd actually swap the operands in lsra and simplify codegen | ||
// but its a bit more complicated to do so for many operands as well | ||
// as being complicated to tell codegen how to pick the right instruction | ||
|
||
instruction ins = INS_invalid; | ||
|
||
if (op1->isContained() || op1->isUsedFromSpillTemp()) | ||
{ | ||
// targetReg == op3NodeReg or targetReg == ? | ||
// op3 = ([op1] * op2) + op3 | ||
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 | ||
ins = _231form; | ||
std::swap(emitOp1, emitOp3); | ||
|
||
if (targetReg == op2NodeReg) | ||
{ | ||
std::swap(emitOp1, emitOp2); | ||
// op2 = ([op1] * op2) + op3 | ||
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 | ||
ins = _132form; | ||
std::swap(emitOp2, emitOp3); | ||
std::swap(emitOp1, emitOp2); | ||
} | ||
else | ||
} | ||
else if (op3->isContained() || op3->isUsedFromSpillTemp()) | ||
{ | ||
// targetReg could be op1NodeReg, op2NodeReg, or not equal to any op | ||
// op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3] | ||
// ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3 | ||
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] | ||
ins = _213form; | ||
|
||
if (!copiesUpperBits && (targetReg == op2NodeReg)) | ||
{ | ||
// targetReg == op3NodeReg or targetReg == ? | ||
// op3 = ([op1] * op2) + op3 | ||
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 | ||
ins = _231form; | ||
std::swap(emitOp1, emitOp3); | ||
// op2 = (op1 * op2) + [op3] | ||
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] | ||
std::swap(emitOp1, emitOp2); | ||
} | ||
} | ||
else if (op2->isContained() || op2->isUsedFromSpillTemp()) | ||
{ | ||
// targetReg == op1NodeReg or targetReg == ? | ||
// op1 = (op1 * [op2]) + op3 | ||
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 | ||
ins = _132form; | ||
std::swap(emitOp2, emitOp3); | ||
|
||
if (!copiesUpperBits && (targetReg == op3NodeReg)) | ||
{ | ||
// op3 = (op1 * [op2]) + op3 | ||
// 231 form: XMM1 = (XMM2 * [XMM3]) + XMM1 | ||
ins = _231form; | ||
std::swap(emitOp1, emitOp3); | ||
} | ||
else | ||
{ | ||
// targetReg == op1NodeReg or targetReg == ? | ||
// op1 = (op1 * [op2]) + op3 | ||
// 132 form: XMM1 = (XMM1 * [XMM3]) + XMM2 | ||
ins = _132form; | ||
std::swap(emitOp1, emitOp2); | ||
} | ||
std::swap(emitOp2, emitOp3); | ||
} | ||
else | ||
{ | ||
// targetReg could be op1NodeReg, op2NodeReg, or not equal to any op | ||
// op1 = (op1 * op2) + [op3] or op2 = (op1 * op2) + [op3] | ||
// ? = (op1 * op2) + [op3] or ? = (op1 * op2) + op3 | ||
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] | ||
if (!copiesUpperBits && (targetReg == op2NodeReg)) | ||
// containedOpNum == 0 | ||
// no extra work when resultOpNum is 0 or 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove the references of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm going to remove this in a follow up PR (will put it up about 5 minutes after merging). This will allow us to get CI passing again without needing to wait on the PR to rerun all tests for a comment change. |
||
if (targetReg == op2NodeReg) | ||
{ | ||
// op2 = (op1 * op2) + [op3] | ||
// 213 form: XMM1 = (XMM2 * XMM1) + [XMM3] | ||
ins = _213form; | ||
std::swap(emitOp1, emitOp2); | ||
} | ||
else if (targetReg == op3NodeReg) | ||
{ | ||
ins = _231form; | ||
std::swap(emitOp1, emitOp3); | ||
} | ||
else | ||
{ | ||
ins = _213form; | ||
} | ||
} | ||
|
||
assert(ins != INS_invalid); | ||
genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3); | ||
genProduceReg(node); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After
std::swap(emitOp1, emitOp3);
andstd::swap(emitOp2, emitOp3);
, we will getemitOp1 = op3; emitOp2 = op1; emitOp3 = op2
. But in my understanding, we needemitOp1 = op2; emitOp2 = op3; emitOp3 = op1
here. Is there anything I'm missing?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tannergooding, in case you haven't seen this one #62343. That was a case missed and captured by pmi. I think you also solved it in this new commit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're right and that means LSRA has an issue here. Will fix