Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize FMA codegen base on the overwritten #58196

Merged
merged 40 commits into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ee2c0b6
Optimize FMA codegen base on the overwritten
weilinwa Jul 20, 2021
46d0011
Improve function/var names
weilinwa Aug 27, 2021
cce4bda
Add assertions
weilinwa Aug 27, 2021
b825291
Get use of FMA with TryGetUse
weilinwa Sep 7, 2021
f615e39
Decide FMA form with two conditions, OverwrittenOpNum and isContained
weilinwa Sep 8, 2021
b698036
Fix op reg error in codegen
weilinwa Sep 10, 2021
7d9c0d6
Decide form using lastUse and isContained in no overwritten case
weilinwa Sep 15, 2021
1344d92
Clean up code
weilinwa Sep 18, 2021
029a9b5
Separate default case overwrittenOpNum==0
weilinwa Sep 20, 2021
f2a371f
Apply format patch
weilinwa Sep 29, 2021
9955389
Change variable and function names
weilinwa Oct 1, 2021
7c56653
Update regOptional for op1 and resolve some other comments
weilinwa Oct 5, 2021
1d51caa
Optimize FMA codegen base on the overwritten
weilinwa Jul 20, 2021
091133e
Improve function/var names
weilinwa Aug 27, 2021
9a6ae44
Add assertions
weilinwa Aug 27, 2021
ffcff76
Get use of FMA with TryGetUse
weilinwa Sep 7, 2021
5641f8f
Decide FMA form with two conditions, OverwrittenOpNum and isContained
weilinwa Sep 8, 2021
b7312ac
Fix op reg error in codegen
weilinwa Sep 10, 2021
a325fe3
Decide form using lastUse and isContained in no overwritten case
weilinwa Sep 15, 2021
0f950dd
Clean up code
weilinwa Sep 18, 2021
33a596d
Separate default case overwrittenOpNum==0
weilinwa Sep 20, 2021
5da9368
Apply format patch
weilinwa Sep 29, 2021
c3a9f07
Change variable and function names
weilinwa Oct 1, 2021
9e356aa
Update regOptional for op1 and resolve some other comments
weilinwa Oct 5, 2021
f8159bc
Change var names
weilinwa Oct 13, 2021
18bbe4d
Resolve merge conflicts.
weilinwa Oct 13, 2021
2ca2524
Fix jit format
weilinwa Oct 13, 2021
17bd967
Fix build node error for op1 is regOptional
weilinwa Oct 14, 2021
eed5912
Use targetReg instead of GetResultOpNumForFMA in codegen
weilinwa Oct 28, 2021
43c5034
Update variable names
weilinwa Nov 2, 2021
5ef70a5
Refactor lsra to solve lastUse status changed caused assertion failure
weilinwa Nov 7, 2021
bfa6924
Add check to prioritize contained op in lsra
weilinwa Nov 7, 2021
12f260b
Update for jit format
weilinwa Nov 7, 2021
5ca658e
Simplify code
weilinwa Nov 17, 2021
ec4ef66
Resolve comments
weilinwa Nov 17, 2021
aa93a85
Comment out assert because of lastUse change
weilinwa Nov 19, 2021
c66a018
Fix some copiesUpperBits related errors
weilinwa Nov 22, 2021
ff5a433
Merge branch 'main' into fma_opt
weilinwa Nov 22, 2021
a4657c7
Update src/coreclr/jit/lsraxarch.cpp
weilinwa Nov 30, 2021
75d7a37
Add link to the new issue
weilinwa Nov 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19791,6 +19791,32 @@ uint16_t GenTreeLclVarCommon::GetLclOffs() const
}
}

#ifdef FEATURE_HW_INTRINSICS
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
//------------------------------------------------------------------------
//
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
unsigned GenTreeHWIntrinsic::GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3)
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
// only FMA intrinsic node should call into this function
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
GenTreeLclVarCommon* overwritten = this->gtNext->AsLclVarCommon();
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
assert(overwritten->gtOper == GT_STORE_LCL_VAR || overwritten->gtOper == GT_LCL_VAR);
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
unsigned flag = 0; // 1->op1, 2->op2, 3->op3
if (op1->IsLocal() && op1->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum())
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
flag = 1;
}
else if (op2->IsLocal() && op2->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum())
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
flag = 2;
}
else if (op3->IsLocal() && op3->AsLclVarCommon()->GetLclNum() == overwritten->GetLclNum())
{
flag = 3;
}

return flag;
}
#endif

#ifdef TARGET_ARM
//------------------------------------------------------------------------
// IsOffsetMisaligned: check if the field needs a special handling on arm.
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -5187,6 +5187,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or
// MemoryStore semantics, false otherwise

unsigned GetFMAOverwritten(GenTree* op1, GenTree* op2, GenTree* op3);

#if DEBUGGABLE_GENTREE
GenTreeHWIntrinsic() : GenTreeJitIntrinsic()
{
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2133,7 +2133,9 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
assert(!copiesUpperBits || !op1->isContained());

if (op2->isContained() || op2->isUsedFromSpillTemp())
unsigned flag = node->GetFMAOverwritten(op1, op2, op3);
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

if (flag == 1)
{
// 132 form: op1 = (op1 * op3) + [op2]

Expand All @@ -2142,7 +2144,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
op2Reg = op3->GetRegNum();
op3 = op2;
}
else if (op1->isContained() || op1->isUsedFromSpillTemp())
else if (flag == 3)
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
{
// 231 form: op3 = (op2 * op3) + [op1]

Expand Down
79 changes: 54 additions & 25 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6315,37 +6315,66 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
{
bool supportsRegOptional = false;

if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
{
// 213 form: op1 = (op2 * op1) + [op3]
MakeSrcContained(node, op3);
}
else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
{
// 132 form: op1 = (op1 * op3) + [op2]
MakeSrcContained(node, op2);
}
else if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional))
unsigned flag = node->GetFMAOverwritten(op1, op2, op3);

switch (flag)
{
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
case 1:
{
if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
{
// 132 form: op1 = (op1 * op3) + [op2]
MakeSrcContained(node, op2);
}
else
{
assert(supportsRegOptional);

if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId))
// 132 form: op1 = (op1 * op3) + [op2]
op2->SetRegOptional();
}
break;
}
case 3:
{
// 231 form: op3 = (op2 * op3) + [op1]
MakeSrcContained(node, op1);
if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional))
{
// Intrinsics with CopyUpperBits semantics cannot have op1 be contained

if (!HWIntrinsicInfo::CopiesUpperBits(intrinsicId))
{
// 231 form: op1 = (op2 * op3) + [op1]
MakeSrcContained(node, op1);
}
}
else
{
assert(supportsRegOptional);
// 231 form: op1 = (op2 * op3) + [op1]
op1->SetRegOptional();
}
break;
}
}
else
{
assert(supportsRegOptional);
default:
{
if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
{
// 213 form: op1 = (op2 * op1) + [op3]
MakeSrcContained(node, op3);
}
else
{
assert(supportsRegOptional);

// TODO-XArch-CQ: Technically any one of the three operands can
// be reg-optional. With a limitation on op1 where
// it can only be so if CopyUpperBits is off.
// https://github.com/dotnet/runtime/issues/6358
// TODO-XArch-CQ: Technically any one of the three operands can
// be reg-optional. With a limitation on op1 where
// it can only be so if CopyUpperBits is off.
// https://github.com/dotnet/runtime/issues/6358

// 213 form: op1 = (op2 * op1) + op3
op3->SetRegOptional();
// 213 form: op1 = (op2 * op1) + [op3]
op3->SetRegOptional();
}
}
}
}
else
Expand Down
8 changes: 5 additions & 3 deletions src/coreclr/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2334,10 +2334,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)

const bool copiesUpperBits = HWIntrinsicInfo::CopiesUpperBits(intrinsicId);

unsigned flag = intrinsicTree->GetFMAOverwritten(op1, op2, op3);

// Intrinsics with CopyUpperBits semantics cannot have op1 be contained
assert(!copiesUpperBits || !op1->isContained());

if (op2->isContained())
if (flag == 1)
{
// 132 form: op1 = (op1 * op3) + [op2]

Expand All @@ -2347,9 +2349,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
srcCount += BuildOperandUses(op2);
srcCount += BuildDelayFreeUses(op3, op1);
}
else if (op1->isContained())
else if (flag == 3)
{
// 231 form: op3 = (op2 * op3) + [op1]
// 231 form: op1 = (op2 * op3) + [op1]
tannergooding marked this conversation as resolved.
Show resolved Hide resolved

tgtPrefUse = BuildUse(op3);

Expand Down