Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Lift remaining cmov restrictions by introducing GT_SELECTCC #82235

Merged
merged 9 commits into from
Feb 23, 2023
Merged
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void genCompareInt(GenTree* treeNode);
#ifdef TARGET_XARCH
bool genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opType);
GenTreeCC* genTryFindFlagsConsumer(GenTree* flagsProducer);
GenTree* genTryFindFlagsConsumer(GenTree* flagsProducer, GenCondition** condition);
#endif

#ifdef FEATURE_SIMD
Expand Down
66 changes: 29 additions & 37 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1363,11 +1363,7 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition)
//
void CodeGen::genCodeForSelect(GenTreeOp* select)
{
#ifdef TARGET_X86
assert(select->OperIs(GT_SELECT, GT_SELECT_HI));
#else
assert(select->OperIs(GT_SELECT));
#endif
assert(select->OperIs(GT_SELECT, GT_SELECTCC));
jakobbotsch marked this conversation as resolved.
Show resolved Hide resolved

if (select->OperIs(GT_SELECT))
{
Expand All @@ -1385,25 +1381,13 @@ void CodeGen::genCodeForSelect(GenTreeOp* select)

if (select->OperIs(GT_SELECT))
{
GenTree* cond = select->AsConditional()->gtCond;
if (cond->isContained())
{
assert(cond->OperIsCompare());
genCodeForCompare(cond->AsOp());
cc = GenCondition::FromRelop(cond);

if (cc.PreferSwap())
{
// genCodeForCompare generated the compare with swapped
// operands because this swap requires fewer branches/cmovs.
cc = GenCondition::Swap(cc);
}
}
else
{
regNumber condReg = cond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, EA_4BYTE, condReg, condReg);
}
GenTree* cond = select->AsConditional()->gtCond;
regNumber condReg = cond->GetRegNum();
GetEmitter()->emitIns_R_R(INS_test, emitActualTypeSize(cond), condReg, condReg);
}
else
{
cc = select->AsOpCC()->gtCondition;
}

// The usual codegen will be
Expand Down Expand Up @@ -1875,11 +1859,9 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
genCodeForSelect(treeNode->AsConditional());
break;

#ifdef TARGET_X86
case GT_SELECT_HI:
case GT_SELECTCC:
genCodeForSelect(treeNode->AsOp());
break;
#endif

case GT_RETURNTRAP:
genCodeForReturnTrap(treeNode->AsOp());
Expand Down Expand Up @@ -6809,22 +6791,23 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT
return false;
}

GenTreeCC* cc = nullptr;
GenCondition cond;
GenTree* consumer = nullptr;
GenCondition* mutableCond = nullptr;
GenCondition cond;

if (tree->OperIsCompare())
{
cond = GenCondition::FromIntegralRelop(tree);
}
else
{
cc = genTryFindFlagsConsumer(tree);
if (cc == nullptr)
consumer = genTryFindFlagsConsumer(tree, &mutableCond);
if (consumer == nullptr)
{
return false;
}

cond = cc->gtCondition;
cond = *mutableCond;
}

if (GetEmitter()->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(opType), cond))
Expand All @@ -6833,11 +6816,12 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT
return true;
}

if ((cc != nullptr) && GetEmitter()->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(opType), cond))
if ((mutableCond != nullptr) &&
GetEmitter()->AreFlagsSetForSignJumpOpt(op1->GetRegNum(), emitTypeSize(opType), cond))
{
JITDUMP("Not emitting compare due to sign being already set; modifying [%06u] to check sign flag\n",
Compiler::dspTreeID(cc));
cc->gtCondition =
Compiler::dspTreeID(consumer));
*mutableCond =
(cond.GetCode() == GenCondition::SLT) ? GenCondition(GenCondition::S) : GenCondition(GenCondition::NS);
return true;
}
Expand All @@ -6851,11 +6835,12 @@ bool CodeGen::genCanAvoidEmittingCompareAgainstZero(GenTree* tree, var_types opT
//
// Parameters:
// producer - the node that produces CPU flags
// cond - [out] the pointer to the condition inside that consumer.
//
// Returns:
// A node that consumes the flags, or nullptr if no such node was found.
//
GenTreeCC* CodeGen::genTryFindFlagsConsumer(GenTree* producer)
GenTree* CodeGen::genTryFindFlagsConsumer(GenTree* producer, GenCondition** cond)
{
assert((producer->gtFlags & GTF_SET_FLAGS) != 0);
// We allow skipping some nodes where we know for sure that the flags are
Expand All @@ -6866,7 +6851,14 @@ GenTreeCC* CodeGen::genTryFindFlagsConsumer(GenTree* producer)
{
if (candidate->OperIs(GT_JCC, GT_SETCC))
{
return candidate->AsCC();
*cond = &candidate->AsCC()->gtCondition;
return candidate;
}

if (candidate->OperIs(GT_SELECTCC))
{
*cond = &candidate->AsOpCC()->gtCondition;
return candidate;
}

// The following nodes can be inserted between the compare and the user
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2414,6 +2414,9 @@ class Compiler
// For binary opers.
GenTree* gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, GenTree* op2);

GenTreeCC* gtNewCC(genTreeOps oper, var_types type, GenCondition cond);
GenTreeOpCC* gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2);

GenTreeColon* gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode);
GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon);

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/decomposelongs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1545,10 +1545,10 @@ GenTree* DecomposeLongs::DecomposeSelect(LIR::Use& use)

// Normally GT_SELECT is responsible for evaluating the condition into
// flags, but for the "upper half" we treat the lower GT_SELECT similar to
// other flag producing nodes and reuse them. GT_SELECT_HI is the variant
// other flag producing nodes and reuse them. GT_SELECTCC is the variant
// that uses existing flags and has no condition as part of it.
select->gtFlags |= GTF_SET_FLAGS;
GenTree* hiSelect = m_compiler->gtNewOperNode(GT_SELECT_HI, TYP_INT, hiOp1, hiOp2);
GenTree* hiSelect = m_compiler->gtNewOperCC(GT_SELECTCC, TYP_INT, GenCondition::NE, hiOp1, hiOp2);

Range().InsertAfter(select, hiSelect);

Expand Down
17 changes: 17 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ void GenTree::InitNodeSize()
static_assert_no_msg(sizeof(GenTreeLclVar) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeLclFld) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeCC) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeOpCC) <= TREE_NODE_SZ_SMALL);
static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node
static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node
static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node
Expand Down Expand Up @@ -6943,6 +6944,18 @@ GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1,
return node;
}

GenTreeCC* Compiler::gtNewCC(genTreeOps oper, var_types type, GenCondition cond)
{
GenTreeCC* node = new (this, oper) GenTreeCC(oper, type, cond);
return node;
}

GenTreeOpCC* Compiler::gtNewOperCC(genTreeOps oper, var_types type, GenCondition cond, GenTree* op1, GenTree* op2)
{
GenTreeOpCC* node = new (this, oper) GenTreeOpCC(oper, type, cond, op1, op2);
return node;
}

GenTreeColon* Compiler::gtNewColonNode(var_types type, GenTree* elseNode, GenTree* thenNode)
{
return new (this, GT_COLON) GenTreeColon(TYP_INT, elseNode, thenNode);
Expand Down Expand Up @@ -12134,6 +12147,10 @@ void Compiler::gtDispTree(GenTree* tree,
break;
}
}
else if (tree->OperIs(GT_SELECTCC))
{
printf(" cond=%s", tree->AsOpCC()->gtCondition.Name());
}

gtDispCommonEndLine(tree);

Expand Down
32 changes: 29 additions & 3 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1683,7 +1683,16 @@ struct GenTree

bool OperIsConditionalJump() const
{
return (gtOper == GT_JTRUE) || (gtOper == GT_JCMP) || (gtOper == GT_JCC);
return OperIs(GT_JTRUE, GT_JCMP, GT_JCC);
}

bool OperConsumesFlags() const
{
#if !defined(TARGET_64BIT)
if (OperIs(GT_ADD_HI, GT_SUB_HI))
return true;
#endif
return OperIs(GT_JCC, GT_SETCC, GT_SELECTCC);
}

#ifdef DEBUG
Expand Down Expand Up @@ -8518,12 +8527,11 @@ struct GenCondition
};

// Represents a GT_JCC or GT_SETCC node.

struct GenTreeCC final : public GenTree
{
GenCondition gtCondition;

GenTreeCC(genTreeOps oper, GenCondition condition, var_types type = TYP_VOID)
GenTreeCC(genTreeOps oper, var_types type, GenCondition condition)
: GenTree(oper, type DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition)
{
assert(OperIs(GT_JCC, GT_SETCC));
Expand All @@ -8536,6 +8544,24 @@ struct GenTreeCC final : public GenTree
#endif // DEBUGGABLE_GENTREE
};

// Represents a node with two operands and a condition.
struct GenTreeOpCC final : public GenTreeOp
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any difference between a GenTreeOpCC and a GenTreeConditional?

If not, I assume it's here just to help with categorising nodes.
Could GenTreeOpCC be a subclass of GenTreeConditional (or vice versa?)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes -- GenTreeOpCC is a binary node with a condition code in it. As such, it only makes sense in the backend with explicit ordering where the flags definition can happen right before it. There is no explicit link to the flags definition (at least until #82355, though this is just an experiment for now).
So it's sort of a lowered version of GenTreeConditional.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't spot this at first but GenTreeOpCC does not have a pointer to the genTree for the condition, except via gtPrev. I see that GenTreeCC works in the same way. That feels odd at first, but this is LIR, and it's the prev/next that matter. Ok, I'm happy with how that works now.

Thinking about the CCMP nodes, they would also use GenTreeOpCC. We'd need a CCMP node plus all 6 conditions (CCMP_EQ, CCMP_NE, CCMP_LT, etc).

Coming from HIR we have:

JTRUE(NE(AND(LT, GT), 0))

OptimizeConstCompare() will lower to a TEST_EQ:

JTRUE(TEST_EQ(LT, GT)))

Then lowerCompare() will create a CCMP and CMP:

CMP
JTRUE(CCMP_LT[GT])

Finally LowerJtrue() will create the JCC:

CMP
CCMP[GT]
JCC[LT]

Which means I need to wait for this PR before I can continue with the CCMP work.

Copy link
Member Author

@jakobbotsch jakobbotsch Feb 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds good to me. Feel free to lift any of the code from this PR if it helps.

We'd need a CCMP node plus all 6 conditions (CCMP_EQ, CCMP_NE, CCMP_LT, etc).

Note that this PR is teaching TryLowerConditionToFlagsNode about GT_SETCC nodes. I think I had a note on it somewhere else, but I wouldn't expect the 6 combinations to be necessary due to that. I.e. we can represent CCMP_LT as CCMP + SETCC[LT]. The transformation done in this PR will automatically lower JTRUE(SETCC[LT]) into JCC[LT].
I think it's likely compare chains can end up being handled similarly to this, that is, something like AND(SETCC[cond], x) can be turned into some shape of CCMP + SETCC and then lowering will continue working on these successively (haven't worked out the details).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With that scheme:

Coming from HIR we have:

JTRUE(NE(AND(LT, GT), 0))

OptimizeConstCompare() will lower to a TEST_EQ:

JTRUE(TEST_EQ(LT, GT)))

Then lowerCompare() will create a CCMP, SETCC and CMP:

CMP
CCMP[GT]
JTRUE(SETCC[LT])

Finally LowerJtrue() will create the JCC:

CMP
CCMP[GT]
JCC[LT]

I'm happy with that setup.

It's not clear to me how much of the existing AND chains code will be left in codegen after this, but that can be checked later.

{
GenCondition gtCondition;

GenTreeOpCC(genTreeOps oper, var_types type, GenCondition condition, GenTree* op1 = nullptr, GenTree* op2 = nullptr)
: GenTreeOp(oper, type, op1, op2 DEBUGARG(/*largeNode*/ FALSE)), gtCondition(condition)
{
assert(OperIs(GT_SELECTCC));
}

#if DEBUGGABLE_GENTREE
GenTreeOpCC() : GenTreeOp()
{
}
#endif // DEBUGGABLE_GENTREE
};

//------------------------------------------------------------------------
// Deferred inline functions of GenTree -- these need the subtypes above to
// be defined already.
Expand Down
13 changes: 7 additions & 6 deletions src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,6 @@ GTNODE(SUB_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
GTNODE(LSH_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
GTNODE(RSH_LO , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)

// Variant of SELECT that reuses flags computed by a previous SELECT.
GTNODE(SELECT_HI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
#endif // !defined(TARGET_64BIT)

#ifdef FEATURE_HW_INTRINSICS
Expand Down Expand Up @@ -234,16 +232,19 @@ GTNODE(CNEG_LT , GenTreeOp ,0,GTK_UNOP|DBK_NOTHIR) // Conditi
GTNODE(CMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
// Generate a test instruction; sets the CPU flags according to (a & b) and does not produce a value.
GTNODE(TEST , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
#ifdef TARGET_XARCH
// The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value.
GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR))
#endif
// Makes a comparison and jump if the condition specified. Does not set flags.
GTNODE(JCMP , GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR)
// Checks the condition flags and branch if the condition specified by GenTreeCC::gtCondition is true.
GTNODE(JCC , GenTreeCC ,0,GTK_LEAF|GTK_NOVALUE|DBK_NOTHIR)
// Checks the condition flags and produces 1 if the condition specified by GenTreeCC::gtCondition is true and 0 otherwise.
GTNODE(SETCC , GenTreeCC ,0,GTK_LEAF|DBK_NOTHIR)
#ifdef TARGET_XARCH
// The XARCH BT instruction. Like CMP, this sets the condition flags (CF to be precise) and does not produce a value.
GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE|DBK_NOTHIR))
#endif
// Variant of SELECT that reuses flags computed by a previous node with the specified condition.
GTNODE(SELECTCC , GenTreeCC ,0,GTK_BINOP|DBK_NOTHIR)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should SELECTCC here be a GenTreeOpCC ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it should. Doesn't look like it has any impact since we only use this for some node measuring that can be turned on, but I will keep in mind to fix it in a follow-up.



//-----------------------------------------------------------------------------
// Other nodes that look like unary/binary operators:
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gtstructs.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ GTSTRUCT_1(AllocObj , GT_ALLOCOBJ)
GTSTRUCT_1(RuntimeLookup, GT_RUNTIMELOOKUP)
GTSTRUCT_1(ArrAddr , GT_ARR_ADDR)
GTSTRUCT_2(CC , GT_JCC, GT_SETCC)
GTSTRUCT_1(OpCC , GT_SELECTCC)
#if defined(TARGET_X86)
GTSTRUCT_1(MultiRegOp , GT_MUL_LONG)
#elif defined (TARGET_ARM)
Expand Down
Loading