Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow multiple kmask registers to be allocated and cleanup some codegen around them #89059

Merged
merged 19 commits into from
Jul 25, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
f81815f
Allow multiple kmask registers to be allocated and cleanup some codeg…
tannergooding Jul 17, 2023
2452f49
Apply formatting patch
tannergooding Jul 18, 2023
28dda4a
Fix an assert to include TYP_STRUCT
tannergooding Jul 18, 2023
692c8b4
Merge remote-tracking branch 'dotnet/main' into kmask
tannergooding Jul 19, 2023
47827c4
Ensure kmask registers aren't in the default killset
tannergooding Jul 19, 2023
442e26d
Apply formatting patch
tannergooding Jul 19, 2023
d1ef0ea
Move the kmask optimizations up to morph
tannergooding Jul 20, 2023
f78a16e
Merge remote-tracking branch 'dotnet/main' into kmask
tannergooding Jul 20, 2023
80a565f
Ensure unique VN for ConvertMaskToVector
tannergooding Jul 20, 2023
4b73427
Ensure some basic other handling for kmask testing is handled
tannergooding Jul 20, 2023
1e45fdd
Improve the implementation for some managed Vector512 code paths
tannergooding Jul 20, 2023
31b0893
Apply formatting patch
tannergooding Jul 20, 2023
dfe3e31
Ensure that the knot intrinsic is inserted into the IR
tannergooding Jul 20, 2023
75d4d25
Apply formatting patch
tannergooding Jul 21, 2023
07bfc1f
Ensure the conversion of CompareEqualMask(x, zero) to Test(x, x) does…
tannergooding Jul 21, 2023
136e898
Have callee/callerSaveRegs() use an array based lookup
tannergooding Jul 22, 2023
8a0c9a3
Respond to PR feedback and try to reduce TP regression more
tannergooding Jul 24, 2023
370e95b
Merge remote-tracking branch 'dotnet/main' into kmask
tannergooding Jul 25, 2023
43ad9a0
Ensure PTEST doesn't try to handle something utilizing embedded broad…
tannergooding Jul 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,19 @@ CodeGenInterface::CodeGenInterface(Compiler* theCompiler)
{
}

#if defined(TARGET_AMD64)
#if defined(TARGET_XARCH)
void CodeGenInterface::CopyRegisterInfo()
{
#if defined(TARGET_AMD64)
rbmAllFloat = compiler->rbmAllFloat;
rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
}
#endif // TARGET_AMD64

rbmAllMask = compiler->rbmAllMask;
rbmMskCalleeTrash = compiler->rbmMskCalleeTrash;
}
#endif // TARGET_XARCH

/*****************************************************************************/

CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
Expand Down
20 changes: 17 additions & 3 deletions src/coreclr/jit/codegeninterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,6 @@ class CodeGenInterface
regMaskTP rbmAllFloat;
regMaskTP rbmFltCalleeTrash;

// Call this function after the equivalent fields in Compiler have been initialized.
void CopyRegisterInfo();

regMaskTP get_RBM_ALLFLOAT() const
{
return this->rbmAllFloat;
Expand All @@ -76,6 +73,23 @@ class CodeGenInterface
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
regMaskTP rbmAllMask;
regMaskTP rbmMskCalleeTrash;

// Call this function after the equivalent fields in Compiler have been initialized.
void CopyRegisterInfo();

regMaskTP get_RBM_ALLMASK() const
{
return this->rbmAllMask;
}
regMaskTP get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
#endif // TARGET_XARCH

// genSpillVar is called by compUpdateLifeVar.
// TODO-Cleanup: We should handle the spill directly in CodeGen, rather than
// calling it from compUpdateLifeVar. Then this can be non-virtual.
Expand Down
15 changes: 14 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3375,9 +3375,22 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
rbmFltCalleeTrash |= RBM_HIGHFLOAT;
cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
rbmAllMask = RBM_ALLMASK_INIT;
rbmMskCalleeTrash = RBM_MSK_CALLEE_TRASH_INIT;
cntCalleeTrashMask = CNT_CALLEE_TRASH_MASK_INIT;

if (canUseEvexEncoding())
{
rbmAllMask |= RBM_ALLMASK_EVEX;
rbmMskCalleeTrash |= RBM_MSK_CALLEE_TRASH_EVEX;
cntCalleeTrashMask += CNT_CALLEE_TRASH_MASK;
}

codeGen->CopyRegisterInfo();
#endif // TARGET_AMD64
#endif // TARGET_XARCH
}

#ifdef DEBUG
Expand Down
37 changes: 37 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10914,6 +10914,43 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
private:
// The following are for initializing register allocator "constants" defined in targetamd64.h
// that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which adds
// 8 mask registers for use.
//
// Users of these values need to define four accessor functions:
//
// regMaskTP get_RBM_ALLMASK();
// regMaskTP get_RBM_MSK_CALLEE_TRASH();
// unsigned get_CNT_CALLEE_TRASH_MASK();
// unsigned get_AVAILABLE_REG_COUNT();
//
// which return the values of these variables.
//
// This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only
// TARGET_XARCH requires one.
//
regMaskTP rbmAllMask;
regMaskTP rbmMskCalleeTrash;
unsigned cntCalleeTrashMask;

public:
regMaskTP get_RBM_ALLMASK() const
{
return this->rbmAllMask;
}
regMaskTP get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
unsigned get_CNT_CALLEE_TRASH_MASK() const
{
return this->cntCalleeTrashMask;
}
#endif // TARGET_XARCH

}; // end of class Compiler

//---------------------------------------------------------------------------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,10 @@ void emitter::emitBegCG(Compiler* comp, COMP_HANDLE cmpHandle)
#if defined(TARGET_AMD64)
rbmFltCalleeTrash = emitComp->rbmFltCalleeTrash;
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
rbmMskCalleeTrash = emitComp->rbmMskCalleeTrash;
#endif // TARGET_XARCH
}

void emitter::emitEndCG()
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2311,6 +2311,15 @@ class emitter
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
regMaskTP rbmMskCalleeTrash;

regMaskTP get_RBM_MSK_CALLEE_TRASH() const
{
return this->rbmMskCalleeTrash;
}
#endif // TARGET_AMD64

CORINFO_FIELD_HANDLE emitFltOrDblConst(double constValue, emitAttr attr);
#if defined(FEATURE_SIMD)
CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue);
Expand Down
33 changes: 30 additions & 3 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6246,12 +6246,25 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
case INS_kmovb_msk:
case INS_kmovw_msk:
case INS_kmovd_msk:
{
// Zero-extends the source
hasSideEffect = true;
break;
}

case INS_kmovq_msk:
{
// No side effect, register is 64-bits
hasSideEffect = false;
break;
}

case INS_kmovb_gpr:
case INS_kmovw_gpr:
case INS_kmovd_gpr:
case INS_kmovq_gpr:
{
// Zero-extends the source
hasSideEffect = true;
break;
}
Expand Down Expand Up @@ -6977,7 +6990,7 @@ void emitter::emitIns_R_R_C(instruction ins,
void emitter::emitIns_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2)
{
assert(IsAvx512OrPriorInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins));

instrDesc* id = emitNewInstr(attr);
id->idIns(ins);
Expand Down Expand Up @@ -11557,7 +11570,7 @@ void emitter::emitDispIns(
case IF_RWR_RWR_RRD:
{
assert(IsVexOrEvexEncodableInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins));
regNumber reg2 = id->idReg2();
regNumber reg3 = id->idReg3();
if (ins == INS_bextr || ins == INS_bzhi
Expand Down Expand Up @@ -14956,7 +14969,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)

instruction ins = id->idIns();
assert(IsVexOrEvexEncodableInstruction(ins));
assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins) || isAvx512Blendv(ins));
assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins) || isAvx512Blendv(ins) || IsKInstruction(ins));
regNumber targetReg = id->idReg1();
regNumber src1 = id->idReg2();
regNumber src2 = id->idReg3();
Expand Down Expand Up @@ -19172,6 +19185,20 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insLatency += opSize == EA_32BYTE ? PERFSCORE_LATENCY_6C : PERFSCORE_LATENCY_4C;
break;

case INS_vptestmb:
case INS_vptestmd:
case INS_vptestmq:
case INS_vptestmw:
case INS_vptestnmb:
case INS_vptestnmd:
case INS_vptestnmq:
case INS_vptestnmw:
{
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_4C;
break;
}

case INS_mpsadbw:
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency += PERFSCORE_LATENCY_4C;
Expand Down
Loading