Skip to content

Commit

Permalink
Move fgMorphCombineSIMDFieldAssignments to local morph
Browse files Browse the repository at this point in the history
  • Loading branch information
SingleAccretion committed Nov 6, 2022
1 parent 19072fd commit ff2849f
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 204 deletions.
152 changes: 131 additions & 21 deletions src/coreclr/jit/lclmorph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1172,16 +1172,23 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
// the promoted local would look like "{ int a, B }", while the IR would contain "FIELD"
// nodes for the outer struct "A".
//
if (indir->TypeIs(TYP_STRUCT))
// TODO-1stClassStructs: delete this once "IND<struct>" nodes are no more.
if (indir->OperIs(GT_IND) && indir->TypeIs(TYP_STRUCT))
{
// TODO-1stClassStructs: delete this once "IND<struct>" nodes are no more.
if (indir->OperIs(GT_IND))
{
// We do not have a layout for this node.
return;
}
return;
}

ClassLayout* layout = indir->TypeIs(TYP_STRUCT) ? indir->GetLayout(m_compiler) : nullptr;
unsigned indSize = indir->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(indir);
if (indSize > genTypeSize(fieldType))
{
// Retargeting this indirection to reference the promoted field would make it
// "wide", address-exposing the whole parent struct (with all of its fields).
return;
}

ClassLayout* layout = indir->GetLayout(m_compiler);
if (indir->TypeIs(TYP_STRUCT))
{
indir->SetOper(GT_OBJ);
indir->AsBlk()->SetLayout(layout);
indir->AsBlk()->gtBlkOpKind = GenTreeBlk::BlkOpKindInvalid;
Expand Down Expand Up @@ -1368,6 +1375,7 @@ class LocalAddressVisitor final : public GenTreeVisitor<LocalAddressVisitor>
//
PhaseStatus Compiler::fgMarkAddressExposedLocals()
{
bool madeChanges = false;
LocalAddressVisitor visitor(this);

for (BasicBlock* const block : Blocks())
Expand All @@ -1377,27 +1385,129 @@ PhaseStatus Compiler::fgMarkAddressExposedLocals()

for (Statement* const stmt : block->Statements())
{
#ifdef FEATURE_SIMD
if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeIs(TYP_FLOAT) &&
stmt->GetRootNode()->OperIs(GT_ASG))
{
madeChanges |= fgMorphCombineSIMDFieldAssignments(block, stmt);
}
#endif

visitor.VisitStmt(stmt);
}
}

return visitor.MadeChanges() ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
madeChanges |= visitor.MadeChanges();

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//------------------------------------------------------------------------
// fgMarkAddressExposedLocals: Traverses the specified statement and marks address
// exposed locals.
#ifdef FEATURE_SIMD
//-----------------------------------------------------------------------------------
// fgMorphCombineSIMDFieldAssignments:
// If the RHS of the input stmt is a read for simd vector X Field, then this
// function will keep reading next few stmts based on the vector size(2, 3, 4).
// If the next stmts LHS are located contiguous and RHS are also located
// contiguous, then we replace those statements with one store.
//
// Arguments:
// stmt - the statement to traverse
// Argument:
// block - BasicBlock*. block which stmt belongs to
// stmt - Statement*. the stmt node we want to check
//
// Notes:
// Trees such as IND(ADDR(LCL_VAR)), that morph is expected to fold
// to just LCL_VAR, do not result in the involved local being marked
// address exposed.
// Return Value:
// Whether the assignments were successfully coalesced.
//
void Compiler::fgMarkAddressExposedLocals(Statement* stmt)
bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt)
{
LocalAddressVisitor visitor(this);
visitor.VisitStmt(stmt);
GenTree* tree = stmt->GetRootNode();
assert(tree->OperGet() == GT_ASG);

GenTree* originalLHS = tree->AsOp()->gtOp1;
GenTree* prevLHS = tree->AsOp()->gtOp1;
GenTree* prevRHS = tree->AsOp()->gtOp2;
unsigned index = 0;
CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
unsigned simdSize = 0;
GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &simdBaseJitType, &index, &simdSize, true);

if ((simdStructNode == nullptr) || (index != 0) || (simdBaseJitType != CORINFO_TYPE_FLOAT))
{
// if the RHS is not from a SIMD vector field X, then there is no need to check further.
return false;
}

var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdType = getSIMDTypeForSize(simdSize);
int assignmentsCount = simdSize / genTypeSize(simdBaseType) - 1;
int remainingAssignments = assignmentsCount;
Statement* curStmt = stmt->GetNextStmt();
Statement* lastStmt = stmt;

while (curStmt != nullptr && remainingAssignments > 0)
{
GenTree* exp = curStmt->GetRootNode();
if (exp->OperGet() != GT_ASG)
{
break;
}
GenTree* curLHS = exp->gtGetOp1();
GenTree* curRHS = exp->gtGetOp2();

if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
{
break;
}

remainingAssignments--;
prevLHS = curLHS;
prevRHS = curRHS;

lastStmt = curStmt;
curStmt = curStmt->GetNextStmt();
}

if (remainingAssignments > 0)
{
// if the left assignments number is bigger than zero, then this means
// that the assignments are not assigning to the contiguously memory
// locations from same vector.
return false;
}

JITDUMP("\nFound contiguous assignments from a SIMD vector to memory.\n");
JITDUMP("From " FMT_BB ", " FMT_STMT " to " FMT_STMT "\n", block->bbNum, stmt->GetID(), lastStmt->GetID());

for (int i = 0; i < assignmentsCount; i++)
{
fgRemoveStmt(block, stmt->GetNextStmt());
}

GenTree* dstNode;

if (originalLHS->OperIs(GT_LCL_FLD))
{
dstNode = originalLHS;
dstNode->gtType = simdType;
}
else
{
GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst);
}

JITDUMP("\n" FMT_BB " " FMT_STMT " (before):\n", block->bbNum, stmt->GetID());
DISPSTMT(stmt);

assert(!simdStructNode->CanCSE() && varTypeIsSIMD(simdStructNode));
simdStructNode->ClearDoNotCSE();

tree = gtNewAssignNode(dstNode, simdStructNode);

stmt->SetRootNode(tree);

JITDUMP("\nReplaced " FMT_BB " " FMT_STMT " (after):\n", block->bbNum, stmt->GetID());
DISPSTMT(stmt);

return true;
}
#endif // FEATURE_SIMD
182 changes: 0 additions & 182 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14485,13 +14485,6 @@ void Compiler::fgMorphStmts(BasicBlock* block)
fgRemoveStmt(block, stmt);
continue;
}
#ifdef FEATURE_SIMD
if (opts.OptimizationEnabled() && stmt->GetRootNode()->TypeGet() == TYP_FLOAT &&
stmt->GetRootNode()->OperGet() == GT_ASG)
{
fgMorphCombineSIMDFieldAssignments(block, stmt);
}
#endif

fgMorphStmt = stmt;
compCurStmt = stmt;
Expand Down Expand Up @@ -16103,181 +16096,6 @@ void Compiler::fgMarkDemotedImplicitByRefArgs()
#endif // FEATURE_IMPLICIT_BYREFS
}

#ifdef FEATURE_SIMD

//-----------------------------------------------------------------------------------
// fgMorphCombineSIMDFieldAssignments:
// If the RHS of the input stmt is a read for simd vector X Field, then this function
// will keep reading next few stmts based on the vector size(2, 3, 4).
// If the next stmts LHS are located contiguous and RHS are also located
// contiguous, then we replace those statements with a copyblk.
//
// Argument:
// block - BasicBlock*. block which stmt belongs to
// stmt - Statement*. the stmt node we want to check
//
// return value:
// if this function successfully optimized the stmts, then return true. Otherwise
// return false;

bool Compiler::fgMorphCombineSIMDFieldAssignments(BasicBlock* block, Statement* stmt)
{
GenTree* tree = stmt->GetRootNode();
assert(tree->OperGet() == GT_ASG);

GenTree* originalLHS = tree->AsOp()->gtOp1;
GenTree* prevLHS = tree->AsOp()->gtOp1;
GenTree* prevRHS = tree->AsOp()->gtOp2;
unsigned index = 0;
CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
unsigned simdSize = 0;
GenTree* simdStructNode = getSIMDStructFromField(prevRHS, &simdBaseJitType, &index, &simdSize, true);

if (simdStructNode == nullptr || index != 0 || simdBaseJitType != CORINFO_TYPE_FLOAT)
{
// if the RHS is not from a SIMD vector field X, then there is no need to check further.
return false;
}

var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
var_types simdType = getSIMDTypeForSize(simdSize);
int assignmentsCount = simdSize / genTypeSize(simdBaseType) - 1;
int remainingAssignments = assignmentsCount;
Statement* curStmt = stmt->GetNextStmt();
Statement* lastStmt = stmt;

while (curStmt != nullptr && remainingAssignments > 0)
{
GenTree* exp = curStmt->GetRootNode();
if (exp->OperGet() != GT_ASG)
{
break;
}
GenTree* curLHS = exp->gtGetOp1();
GenTree* curRHS = exp->gtGetOp2();

if (!areArgumentsContiguous(prevLHS, curLHS) || !areArgumentsContiguous(prevRHS, curRHS))
{
break;
}

remainingAssignments--;
prevLHS = curLHS;
prevRHS = curRHS;

lastStmt = curStmt;
curStmt = curStmt->GetNextStmt();
}

if (remainingAssignments > 0)
{
// if the left assignments number is bigger than zero, then this means
// that the assignments are not assigning to the contiguously memory
// locations from same vector.
return false;
}
#ifdef DEBUG
if (verbose)
{
printf("\nFound contiguous assignments from a SIMD vector to memory.\n");
printf("From " FMT_BB ", stmt ", block->bbNum);
printStmtID(stmt);
printf(" to stmt");
printStmtID(lastStmt);
printf("\n");
}
#endif

for (int i = 0; i < assignmentsCount; i++)
{
fgRemoveStmt(block, stmt->GetNextStmt());
}

GenTree* dstNode;

if (originalLHS->OperIs(GT_LCL_FLD))
{
dstNode = originalLHS;
dstNode->gtType = simdType;
dstNode->AsLclFld()->SetLayout(nullptr);

// This may have changed a partial local field into full local field
if (dstNode->IsPartialLclFld(this))
{
dstNode->gtFlags |= GTF_VAR_USEASG;
}
else
{
dstNode->gtFlags &= ~GTF_VAR_USEASG;
}
}
else
{
GenTree* copyBlkDst = createAddressNodeForSIMDInit(originalLHS, simdSize);
if (simdStructNode->OperIsLocal())
{
setLclRelatedToSIMDIntrinsic(simdStructNode);
}

GenTreeLclVarCommon* localDst = copyBlkDst->IsLocalAddrExpr();
if (localDst != nullptr)
{
setLclRelatedToSIMDIntrinsic(localDst);
}

if (simdStructNode->TypeGet() == TYP_BYREF)
{
assert(simdStructNode->OperIsLocal());
assert(lvaIsImplicitByRefLocal(simdStructNode->AsLclVarCommon()->GetLclNum()));
simdStructNode = gtNewIndir(simdType, simdStructNode);
}
else
{
assert(varTypeIsSIMD(simdStructNode));
}

dstNode = gtNewOperNode(GT_IND, simdType, copyBlkDst);
}

#ifdef DEBUG
if (verbose)
{
printf("\n" FMT_BB " stmt ", block->bbNum);
printStmtID(stmt);
printf("(before)\n");
gtDispStmt(stmt);
}
#endif

assert(!simdStructNode->CanCSE());
simdStructNode->ClearDoNotCSE();

tree = gtNewAssignNode(dstNode, simdStructNode);

stmt->SetRootNode(tree);

// Since we generated a new address node which didn't exist before,
// we should expose this address manually here.
// TODO-ADDR: Remove this when LocalAddressVisitor transforms all
// local field access into LCL_FLDs, at that point we would be
// combining 2 existing LCL_FLDs or 2 FIELDs that do not reference
// a local and thus cannot result in a new address exposed local.
fgMarkAddressExposedLocals(stmt);

#ifdef DEBUG
if (verbose)
{
printf("\nReplaced " FMT_BB " stmt", block->bbNum);
printStmtID(stmt);
printf("(after)\n");
gtDispStmt(stmt);
}
#endif
return true;
}

#endif // FEATURE_SIMD

//------------------------------------------------------------------------
// fgCheckStmtAfterTailCall: check that statements after the tail call stmt
// candidate are in one of expected forms, that are desctibed below.
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1748,7 +1748,11 @@ GenTree* Compiler::createAddressNodeForSIMDInit(GenTree* tree, unsigned simdSize
byrefNode = gtNewOperNode(GT_COMMA, arrayRef->TypeGet(), arrBndsChk, gtCloneExpr(arrayRef));
}

GenTree* address = gtNewOperNode(GT_ADD, TYP_BYREF, byrefNode, gtNewIconNode(offset, TYP_I_IMPL));
GenTree* address = byrefNode;
if (offset != 0)
{
address = gtNewOperNode(GT_ADD, TYP_BYREF, address, gtNewIconNode(offset, TYP_I_IMPL));
}

return address;
}
Expand Down

0 comments on commit ff2849f

Please sign in to comment.