Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move the [With|Get]Element(SIMD) folding to local morph #76491

Merged
merged 9 commits into from
Nov 25, 2022
2 changes: 0 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5701,8 +5701,6 @@ class Compiler
unsigned* indexOut,
unsigned* simdSizeOut,
bool ignoreUsedInSIMDIntrinsic = false);
GenTree* fgMorphFieldAssignToSimdSetElement(GenTree* tree);
GenTree* fgMorphFieldToSimdGetElement(GenTree* tree);
bool fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt);
void impMarkContiguousSIMDFieldAssignments(Statement* stmt);

Expand Down
234 changes: 199 additions & 35 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
None,
Nop,
BitCast,
GetElement,
WithElement,
LclVar,
LclFld
};
Expand Down Expand Up @@ -909,13 +911,45 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>

case IndirTransform::BitCast:
indir->ChangeOper(GT_BITCAST);
indir->gtGetOp1()->ChangeOper(GT_LCL_VAR);
indir->gtGetOp1()->ChangeType(varDsc->TypeGet());
indir->gtGetOp1()->AsLclVar()->SetLclNum(lclNum);
lclNode = indir->gtGetOp1()->AsLclVarCommon();
lclNode = BashToLclVar(indir->gtGetOp1(), lclNum);
break;

#ifdef FEATURE_HW_INTRINSICS
case IndirTransform::GetElement:
{
var_types elementType = indir->TypeGet();
assert(elementType == TYP_FLOAT);

lclNode = BashToLclVar(indir->gtGetOp1(), lclNum);
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
GenTree* hwiNode = m_compiler->gtNewSimdGetElementNode(elementType, lclNode, indexNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ false);
indir->ReplaceWith(hwiNode, m_compiler);
SingleAccretion marked this conversation as resolved.
Show resolved Hide resolved
}
break;

case IndirTransform::WithElement:
{
assert(user->OperIs(GT_ASG) && (user->gtGetOp1() == indir));
var_types elementType = indir->TypeGet();
assert(elementType == TYP_FLOAT);

lclNode = BashToLclVar(indir, lclNum);
GenTree* simdLclNode = m_compiler->gtNewLclvNode(lclNum, varDsc->TypeGet());
GenTree* indexNode = m_compiler->gtNewIconNode(val.Offset() / genTypeSize(elementType));
GenTree* elementNode = user->gtGetOp2();
user->AsOp()->gtOp2 =
m_compiler->gtNewSimdWithElementNode(varDsc->TypeGet(), simdLclNode, indexNode, elementNode,
CORINFO_TYPE_FLOAT, genTypeSize(varDsc),
/* isSimdAsHWIntrinsic */ false);
user->ChangeType(varDsc->TypeGet());
}
break;
#endif // FEATURE_HW_INTRINSICS

case IndirTransform::LclVar:
// TODO-ADDR: use "BashToLclVar" here.
if (indir->TypeGet() != varDsc->TypeGet())
{
assert(genTypeSize(indir) == genTypeSize(varDsc)); // BOOL <-> UBYTE.
Expand Down Expand Up @@ -996,24 +1030,17 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
return IndirTransform::LclVar;
}

if (varTypeIsSIMD(varDsc))
{
// TODO-ADDR: skip SIMD variables for now, fgMorphFieldAssignToSimdSetElement and
// fgMorphFieldToSimdGetElement need to be updated to recognize LCL_FLDs or moved
// here.
return IndirTransform::None;
}

// Bool and ubyte are the same type.
if ((indir->TypeIs(TYP_BOOL) && (varDsc->TypeGet() == TYP_UBYTE)) ||
(indir->TypeIs(TYP_UBYTE) && (varDsc->TypeGet() == TYP_BOOL)))
{
return IndirTransform::LclVar;
}

bool isDef = user->OperIs(GT_ASG) && (user->gtGetOp1() == indir);

// For small locals on the LHS we can ignore the signed/unsigned diff.
if (user->OperIs(GT_ASG) && (user->gtGetOp1() == indir) &&
(varTypeToSigned(indir) == varTypeToSigned(varDsc)))
if (isDef && (varTypeToSigned(indir) == varTypeToSigned(varDsc)))
{
assert(varTypeIsSmall(indir));
return IndirTransform::LclVar;
Expand All @@ -1024,6 +1051,12 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
return IndirTransform::LclFld;
}

if (varTypeIsSIMD(varDsc) && indir->TypeIs(TYP_FLOAT) && ((val.Offset() % genTypeSize(TYP_FLOAT)) == 0) &&
m_compiler->IsBaselineSimdIsaSupported())
{
return isDef ? IndirTransform::WithElement : IndirTransform::GetElement;
}

// Turn this into a bitcast if we can.
if ((genTypeSize(indir) == genTypeSize(varDsc)) && (varTypeIsFloating(indir) || varTypeIsFloating(varDsc)))
{
Expand Down Expand Up @@ -1139,16 +1172,23 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
// the promoted local would look like "{ int a, B }", while the IR would contain "FIELD"
// nodes for the outer struct "A".
//
if (indir->TypeIs(TYP_STRUCT))
// TODO-1stClassStructs: delete this once "IND<struct>" nodes are no more.
if (indir->OperIs(GT_IND) && indir->TypeIs(TYP_STRUCT))
{
// TODO-1stClassStructs: delete this once "IND<struct>" nodes are no more.
if (indir->OperIs(GT_IND))
{
// We do not have a layout for this node.
return;
}
return;
}

ClassLayout* layout = indir->TypeIs(TYP_STRUCT) ? indir->GetLayout(m_compiler) : nullptr;
unsigned indSize = indir->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(indir);
if (indSize > genTypeSize(fieldType))
{
// Retargeting this indirection to reference the promoted field would make it
// "wide", address-exposing the whole parent struct (with all of its fields).
return;
}

ClassLayout* layout = indir->GetLayout(m_compiler);
if (indir->TypeIs(TYP_STRUCT))
{
indir->SetOper(GT_OBJ);
indir->AsBlk()->SetLayout(layout);
indir->AsBlk()->gtBlkOpKind = GenTreeBlk::BlkOpKindInvalid;
Expand Down Expand Up @@ -1298,6 +1338,27 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
{
return (user == nullptr) || (user->OperIs(GT_COMMA) && (user->AsOp()->gtGetOp1() == node));
}

//------------------------------------------------------------------------
// BashToLclVar: Bash node to a LCL_VAR.
//
// Arguments:
// node - the node to bash
// lclNum - the local's number
//
// Return Value:
// The bashed node.
//
GenTreeLclVar* BashToLclVar(GenTree* node, unsigned lclNum)
{
LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);

node->ChangeOper(GT_LCL_VAR);
node->ChangeType(varDsc->lvNormalizeOnLoad() ? varDsc->TypeGet() : genActualType(varDsc));
node->AsLclVar()->SetLclNum(lclNum);

return node->AsLclVar();
}
};

//------------------------------------------------------------------------
Expand All @@ -1314,6 +1375,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
//
PhaseStatus Compiler::fgMarkAddressExposedLocals()
{
bool madeChanges = false;
LocalAddressVisitor visitor(this);

for (BasicBlock* const block : Blocks())
Expand All @@ -1323,27 +1385,129 @@ PhaseStatus Compiler::fgMarkAddressExposedLocals()

for (Statement* const stmt : block->Statements())
{
#ifdef FEATURE_SIMD
if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeIs(TYP_FLOAT) &&
stmt->GetRootNode()->OperIs(GT_ASG))
{
madeChanges |= fgMorphCombineSIMDFieldAssignments(block, stmt);
}
#endif

visitor.VisitStmt(stmt);
}
}

return visitor.MadeChanges() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
madeChanges |= visitor.MadeChanges();

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//------------------------------------------------------------------------
// fgMarkAddressExposedLocals: Traverses the specified statement and marks address
// exposed locals.
#ifdef FEATURE_SIMD
//-----------------------------------------------------------------------------------
// fgMorphCombineSIMDFieldAssignments:
// If the RHS of the input stmt is a read for simd vector X Field, then this
// function will keep reading next few stmts based on the vector size(2, 3, 4).
// If the next stmts LHS are located contiguous and RHS are also located
// contiguous, then we replace those statements with one store.
//
// Arguments:
// stmt - the statement to traverse
// Argument:
// block - BasicBlock*. block which stmt belongs to
// stmt - Statement*. the stmt node we want to check
//
// Notes:
// Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold
// to just LCL_VAR, do not result in the involved local being marked
// address exposed.
// Return Value:
// Whether the assignments were successfully coalesced.
//
void Compiler::fgMarkAddressExposedLocals(Statement* stmt)
bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt)
{
LocalAddressVisitor visitor(this);
visitor.VisitStmt(stmt);
GenTree* tree = stmt->GetRootNode();
assert(tree->OperGet() == GT_ASG);

GenTree* originalLHS = tree->AsOp()->gtOp1;
GenTree* prevLHS = tree->AsOp()->gtOp1;
GenTree* prevRHS = tree->AsOp()->gtOp2;
unsigned index = 0;
CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
unsigned simdSize = 0;
GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &simdBaseJitType, &index, &simdSize, true);

if ((simdStructNode == nullptr) || (index != 0) || (simdBaseJitType != CORINFO_TYPE_FLOAT))
{
// if the RHS is not from a SIMD vector field X, then there is no need to check further.
return false;
}

var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdType = getSIMDTypeForSize(simdSize);
int assignmentsCount = simdSize / genTypeSize(simdBaseType) - 1;
int remainingAssignments = assignmentsCount;
Statement* curStmt = stmt->GetNextStmt();
Statement* lastStmt = stmt;

while (curStmt != nullptr && remainingAssignments > 0)
{
GenTree* exp = curStmt->GetRootNode();
if (exp->OperGet() != GT_ASG)
{
break;
}
GenTree* curLHS = exp->gtGetOp1();
GenTree* curRHS = exp->gtGetOp2();

if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
{
break;
}

remainingAssignments--;
prevLHS = curLHS;
prevRHS = curRHS;

lastStmt = curStmt;
curStmt = curStmt->GetNextStmt();
}

if (remainingAssignments > 0)
{
// if the left assignments number is bigger than zero, then this means
// that the assignments are not assigning to the contiguously memory
// locations from same vector.
return false;
}

JITDUMP("\nFound contiguous assignments from a SIMD vector to memory.\n");
JITDUMP("From " FMT_BB ", " FMT_STMT " to " FMT_STMT "\n", block->bbNum, stmt->GetID(), lastStmt->GetID());

for (int i = 0; i < assignmentsCount; i++)
{
fgRemoveStmt(block, stmt->GetNextStmt());
}

GenTree* dstNode;

if (originalLHS->OperIs(GT_LCL_FLD))
{
dstNode = originalLHS;
dstNode->gtType = simdType;
}
else
{
GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst);
}

JITDUMP("\n" FMT_BB " " FMT_STMT " (before):\n", block->bbNum, stmt->GetID());
DISPSTMT(stmt);

assert(!simdStructNode->CanCSE() && varTypeIsSIMD(simdStructNode));
simdStructNode->ClearDoNotCSE();

tree = gtNewAssignNode(dstNode, simdStructNode);

stmt->SetRootNode(tree);

JITDUMP("\nReplaced " FMT_BB " " FMT_STMT " (after):\n", block->bbNum, stmt->GetID());
DISPSTMT(stmt);

return true;
}
#endif // FEATURE_SIMD
Loading