Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keep precise argument sizes between import and morph. #43130

Merged
merged 1 commit into from
Nov 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4356,11 +4356,10 @@ class Compiler
InlineCandidateInfo** ppInlineCandidateInfo,
InlineResult* inlineResult);

void impInlineRecordArgInfo(InlineInfo* pInlineInfo,
GenTree* curArgVal,
unsigned argNum,
unsigned __int64 bbFlags,
InlineResult* inlineResult);
void impInlineRecordArgInfo(InlineInfo* pInlineInfo,
GenTree* curArgVal,
unsigned argNum,
InlineResult* inlineResult);
AndyAyersMS marked this conversation as resolved.
Show resolved Hide resolved

void impInlineInitVars(InlineInfo* pInlineInfo);

Expand Down Expand Up @@ -4665,6 +4664,8 @@ class Compiler
BasicBlock* canonicalBlock,
flowList* predEdge);

GenTree* fgCheckCallArgUpdate(GenTree* parent, GenTree* child, var_types origType);

#if defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM)
// Sometimes we need to defer updating the BBF_FINALLY_TARGET bit. fgNeedToAddFinallyTargetBits signals
// when this is necessary.
Expand Down Expand Up @@ -10447,6 +10448,7 @@ class GenTreeVisitor
case GT_NULLCHECK:
case GT_PUTARG_REG:
case GT_PUTARG_STK:
case GT_PUTARG_TYPE:
case GT_RETURNTRAP:
case GT_NOP:
case GT_RETURN:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4387,6 +4387,7 @@ void GenTree::VisitOperands(TVisitor visitor)
case GT_NULLCHECK:
case GT_PUTARG_REG:
case GT_PUTARG_STK:
case GT_PUTARG_TYPE:
#if FEATURE_ARG_SPLIT
case GT_PUTARG_SPLIT:
#endif // FEATURE_ARG_SPLIT
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/src/jit/ee_il_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ unsigned Compiler::eeGetArgSize(CORINFO_ARG_LIST_HANDLE list, CORINFO_SIG_INFO*
if (varTypeIsStruct(argType))
{
unsigned structSize = info.compCompHnd->getClassSize(argClass);
return structSize; // TODO: roundUp() needed here?
return roundUp(structSize, TARGET_POINTER_SIZE);
}
#endif // UNIX_AMD64_ABI
return TARGET_POINTER_SIZE;
Expand Down
104 changes: 87 additions & 17 deletions src/coreclr/src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22743,7 +22743,11 @@ void Compiler::fgAttachStructInlineeToAsg(GenTree* tree, GenTree* child, CORINFO
// If the return type is a struct type and we're on a platform
// where structs can be returned in multiple registers, ensure the
// call has a suitable parent.

//
// If the original call type and the substitution type are different
// the functions makes necessary updates. It could happen if there was
// an implicit conversion in the inlinee body.
//
Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTree** pTree, fgWalkData* data)
{
// All the operations here and in the corresponding postorder
Expand Down Expand Up @@ -22798,6 +22802,29 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr
}
#endif // DEBUG

var_types newType = inlineCandidate->TypeGet();

// If we end up swapping type we may need to retype the tree:
sandreenko marked this conversation as resolved.
Show resolved Hide resolved
if (retType != newType)
{
if ((retType == TYP_BYREF) && (tree->OperGet() == GT_IND))
{
// - in an RVA static if we've reinterpreted it as a byref;
assert(newType == TYP_I_IMPL);
JITDUMP("Updating type of the return GT_IND expression to TYP_BYREF\n");
inlineCandidate->gtType = TYP_BYREF;
}
else
{
// - under a call if we changed size of the argument.
GenTree* putArgType = comp->fgCheckCallArgUpdate(data->parent, inlineCandidate, retType);
if (putArgType != nullptr)
{
inlineCandidate = putArgType;
}
}
}

tree->ReplaceWith(inlineCandidate, comp);
comp->compCurBB->bbFlags |= (bbFlags & BBF_SPLIT_GAINED);

Expand All @@ -22809,17 +22836,6 @@ Compiler::fgWalkResult Compiler::fgUpdateInlineReturnExpressionPlaceHolder(GenTr
printf("\n");
}
#endif // DEBUG

var_types newType = tree->TypeGet();

// If we end up swapping in an RVA static we may need to retype it here,
// if we've reinterpreted it as a byref.
if ((retType != newType) && (retType == TYP_BYREF) && (tree->OperGet() == GT_IND))
{
assert(newType == TYP_I_IMPL);
JITDUMP("Updating type of the return GT_IND expression to TYP_BYREF\n");
tree->gtType = TYP_BYREF;
}
}

// If an inline was rejected and the call returns a struct, we may
Expand Down Expand Up @@ -23101,8 +23117,16 @@ Compiler::fgWalkResult Compiler::fgLateDevirtualization(GenTree** pTree, fgWalkD
}
else
{
GenTree* foldedTree = comp->gtFoldExpr(tree);
*pTree = foldedTree;
const var_types retType = tree->TypeGet();
GenTree* foldedTree = comp->gtFoldExpr(tree);
const var_types newType = foldedTree->TypeGet();

GenTree* putArgType = comp->fgCheckCallArgUpdate(data->parent, foldedTree, retType);
if (putArgType != nullptr)
{
foldedTree = putArgType;
}
*pTree = foldedTree;
}

return WALK_CONTINUE;
Expand Down Expand Up @@ -23799,8 +23823,12 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
{
const InlArgInfo& argInfo = inlArgInfo[argNum];
const bool argIsSingleDef = !argInfo.argHasLdargaOp && !argInfo.argHasStargOp;
GenTree* const argNode = inlArgInfo[argNum].argNode;
unsigned __int64 bbFlags = inlArgInfo[argNum].bbFlags;
GenTree* argNode = inlArgInfo[argNum].argNode;
const bool argHasPutArg = argNode->OperIs(GT_PUTARG_TYPE);

unsigned __int64 bbFlags = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See note above; I don't believe you can set these to zero like this and pass jit stress.

argNode = argNode->gtSkipPutArgType();
argNode = argNode->gtRetExprVal(&bbFlags);
AndyAyersMS marked this conversation as resolved.
Show resolved Hide resolved

if (argInfo.argHasTmp)
{
Expand All @@ -23820,7 +23848,11 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)

GenTree* argSingleUseNode = argInfo.argBashTmpNode;

if ((argSingleUseNode != nullptr) && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && argIsSingleDef)
// argHasPutArg disqualifies the arg from a direct substitution because we don't have information about
// its user. For example: replace `LCL_VAR short` with `PUTARG_TYPE short->LCL_VAR int`,
// we should keep `PUTARG_TYPE` iff the user is a call that needs `short` and delete it otherwise.
if ((argSingleUseNode != nullptr) && !(argSingleUseNode->gtFlags & GTF_VAR_CLONED) && argIsSingleDef &&
!argHasPutArg)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new condition allows us to always the argument as a temp with the correct type, we need it because for a correct direct substitution we need to know if the parent is a call or not but we don't have this information.
We create the node when we see an IL opcode like ldlocal and don't have parent information, later, when the node is used, we can't distinguish it from a regular LCL_VAR.
The change causes a small regression on all platforms:

windows x64 crossgen: 5134 (0.02% of base)
windows x64 pmi:  14911 (0.03% of base)
windows arm64 crossgen: 5688 (0.01% of base)
linux x64 crossgen: 9036 (0.02% of base)

, etc. all lower than 0.04%.

What happens there is:

  1. during importation we create a LCL_VAR tempForOurArg, save a pointer to it;
  2. in fgInsertInlineeBlocks we iterate over all such pointers and do in-place replacement if there was only one use of the lclVar, but we don't know the user, so we can't say if it is a call that need a PUTARG or not.

I have not found a simple way to avoid it or compensate for it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to have a comment in the code here explaining why argHasPutArg disqualifies this arg from direct substitution.

{
// Change the temp in-place to the actual argument.
// We currently do not support this for struct arguments, so it must not be a GT_OBJ.
Expand Down Expand Up @@ -26501,6 +26533,44 @@ void Compiler::fgTailMergeThrowsJumpToHelper(BasicBlock* predBlock,
canonicalBlock->bbFlags |= (BBF_JMP_TARGET | BBF_HAS_LABEL);
}

//------------------------------------------------------------------------
// fgCheckCallArgUpdate: check if we are replacing a call argument and add GT_PUTARG_TYPE if necessary.
//
// Arguments:
// parent - the parent that could be a call;
// child - the new child node;
// origType - the original child type;
//
// Returns:
// PUT_ARG_TYPE node if it is needed, nullptr otherwise.
//
GenTree* Compiler::fgCheckCallArgUpdate(GenTree* parent, GenTree* child, var_types origType)
{
if ((parent == nullptr) || !parent->IsCall())
{
return nullptr;
}
const var_types newType = child->TypeGet();
if (newType == origType)
{
return nullptr;
}
if (varTypeIsStruct(origType) || (genTypeSize(origType) == genTypeSize(newType)))
{
assert(!varTypeIsStruct(newType));
return nullptr;
}
GenTree* putArgType = gtNewOperNode(GT_PUTARG_TYPE, origType, child);
#if defined(DEBUG)
if (verbose)
{
printf("For call [%06d] the new argument's type [%06d]", dspTreeID(parent), dspTreeID(child));
printf(" does not match the original type size, add a GT_PUTARG_TYPE [%06d]\n", dspTreeID(parent));
}
#endif
return putArgType;
}

#ifdef DEBUG

//------------------------------------------------------------------------
Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5112,6 +5112,7 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use)
case GT_NULLCHECK:
case GT_PUTARG_REG:
case GT_PUTARG_STK:
case GT_PUTARG_TYPE:
case GT_RETURNTRAP:
case GT_NOP:
case GT_RETURN:
Expand Down Expand Up @@ -9107,6 +9108,7 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node)
case GT_NULLCHECK:
case GT_PUTARG_REG:
case GT_PUTARG_STK:
case GT_PUTARG_TYPE:
case GT_BSWAP:
case GT_BSWAP16:
case GT_KEEPALIVE:
Expand Down Expand Up @@ -12021,7 +12023,7 @@ void Compiler::gtGetArgMsg(GenTreeCall* call, GenTree* arg, unsigned argNum, cha
}
#endif // TARGET_ARM
#if FEATURE_FIXED_OUT_ARGS
sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum, curArgTabEntry->slotNum * TARGET_POINTER_SIZE, 0);
sprintf_s(bufp, bufLength, "arg%d out+%02x%c", argNum, curArgTabEntry->GetByteOffset(), 0);
#else
sprintf_s(bufp, bufLength, "arg%d on STK%c", argNum, 0);
#endif
Expand Down Expand Up @@ -15517,7 +15519,7 @@ GenTree* Compiler::gtNewTempAssign(
// There are 2 special cases:
// 1. we have lost classHandle from a FIELD node because the parent struct has overlapping fields,
// the field was transformed as IND opr GT_LCL_FLD;
// 2. we are propogating `ASG(struct V01, 0)` to `RETURN(struct V01)`, `CNT_INT` doesn't `structHnd`;
// 2. we are propagation `ASG(struct V01, 0)` to `RETURN(struct V01)`, `CNT_INT` doesn't `structHnd`;
// in these cases, we can use the type of the merge return for the assignment.
assert(val->OperIs(GT_IND, GT_LCL_FLD, GT_CNS_INT));
assert(!compDoOldStructRetyping());
Expand Down
45 changes: 37 additions & 8 deletions src/coreclr/src/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1736,7 +1736,9 @@ struct GenTree
inline GenTree* gtEffectiveVal(bool commaOnly = false);

// Tunnel through any GT_RET_EXPRs
inline GenTree* gtRetExprVal(unsigned __int64* pbbFlags);
inline GenTree* gtRetExprVal(unsigned __int64* pbbFlags = nullptr);

inline GenTree* gtSkipPutArgType();

// Return the child of this node if it is a GT_RELOAD or GT_COPY; otherwise simply return the node itself
inline GenTree* gtSkipReloadOrCopy();
Expand Down Expand Up @@ -7111,14 +7113,15 @@ inline GenTree* GenTree::gtGetOp2IfPresent() const
return op2;
}

inline GenTree* GenTree::gtEffectiveVal(bool commaOnly)
inline GenTree* GenTree::gtEffectiveVal(bool commaOnly /* = false */)
{
GenTree* effectiveVal = this;
for (;;)
{
assert(!effectiveVal->OperIs(GT_PUTARG_TYPE));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why you added this here - that is, I can see why we wouldn't expect to encounter these under a COMMA, but I'm not sure why this special assert is needed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have several similar functions like gtEffectiveVal, gtRetExprVal and gtSkipPutArgType. When we call each of them we often don't expect nodes that can be skipped by two others to be on top.
For example, when we call gtEffectiveVal we don't expect something like GT_RET_EXPR(GT_COMMA) or GT_PUTARG_TYPE(GT_COMMA), similar for gtRetExprVal it should not see GT_PUTARG_TYPE(GT_PUTARG_TYPE).
If we do it means the caller did not check for these nodes and we don't skip what we hoped to skip, it doesn't cause correctness issues in my understanding but introduces asm regressions when we can't parse something like we did before the change.

if (effectiveVal->gtOper == GT_COMMA)
{
effectiveVal = effectiveVal->AsOp()->gtOp2;
effectiveVal = effectiveVal->AsOp()->gtGetOp2();
}
else if (!commaOnly && (effectiveVal->gtOper == GT_NOP) && (effectiveVal->AsOp()->gtOp1 != nullptr))
{
Expand Down Expand Up @@ -7147,23 +7150,49 @@ inline GenTree* GenTree::gtEffectiveVal(bool commaOnly)
// Multi-level inlines can form chains of GT_RET_EXPRs.
// This method walks back to the root of the chain.

inline GenTree* GenTree::gtRetExprVal(unsigned __int64* pbbFlags)
inline GenTree* GenTree::gtRetExprVal(unsigned __int64* pbbFlags /* = nullptr */)
{
GenTree* retExprVal = this;
unsigned __int64 bbFlags = 0;

assert(pbbFlags != nullptr);
assert(!retExprVal->OperIs(GT_PUTARG_TYPE));

for (; retExprVal->gtOper == GT_RET_EXPR; retExprVal = retExprVal->AsRetExpr()->gtInlineCandidate)
while (retExprVal->OperIs(GT_RET_EXPR))
{
bbFlags = retExprVal->AsRetExpr()->bbFlags;
const GenTreeRetExpr* retExpr = retExprVal->AsRetExpr();
bbFlags = retExpr->bbFlags;
retExprVal = retExpr->gtInlineCandidate;
}

*pbbFlags = bbFlags;
if (pbbFlags != nullptr)
{
*pbbFlags = bbFlags;
}

return retExprVal;
}

//-------------------------------------------------------------------------
// gtSkipPutArgType - skip PUTARG_TYPE if it is presented.
//
// Returns:
// the original tree or its child if it was a PUTARG_TYPE.
//
// Notes:
// PUTARG_TYPE should be skipped when we are doing transformations
// that are not affected by ABI, for example: inlining, implicit byref morphing.
//
inline GenTree* GenTree::gtSkipPutArgType()
{
if (OperIs(GT_PUTARG_TYPE))
{
GenTree* res = AsUnOp()->gtGetOp1();
assert(!res->OperIs(GT_PUTARG_TYPE));
return res;
}
return this;
}

inline GenTree* GenTree::gtSkipReloadOrCopy()
{
// There can be only one reload or copy (we can't have a reload/copy of a reload/copy)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/src/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ GTNODE(PUTARG_REG , GenTreeMultiRegOp ,0,GTK_UNOP)
#else
GTNODE(PUTARG_REG , GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register
#endif
GTNODE(PUTARG_TYPE , GenTreeOp ,0,GTK_UNOP|GTK_NOTLIR) // operator that places saves argument type between importation and morph
GTNODE(PUTARG_STK , GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack
#if FEATURE_ARG_SPLIT
GTNODE(PUTARG_SPLIT , GenTreePutArgSplit ,0,GTK_UNOP) // operator that places outgoing arg in registers with stack (split struct in ARM32)
Expand Down
Loading