Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lowering subset of Vector512 methods for avx512. #82953

Merged
merged 3 commits into from
Mar 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 49 additions & 2 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5843,7 +5843,13 @@ bool emitter::IsMovInstruction(instruction ins)
case INS_movaps:
case INS_movd:
case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_movsdsse2:
case INS_movss:
case INS_movsx:
Expand Down Expand Up @@ -5927,8 +5933,23 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
case INS_movupd:
case INS_movups:
{
// non EA_32BYTE moves clear the upper bits under VEX encoding
hasSideEffect = UseVEXEncoding() && (size != EA_32BYTE);
// TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these.
// non EA_32BYTE and EA_64BYTE moves clear the upper bits under VEX and EVEX encoding respectively.
if (UseVEXEncoding())
{
if (UseEvexEncoding())
{
hasSideEffect = (size != EA_64BYTE);
}
else
{
hasSideEffect = (size != EA_32BYTE);
}
}
else
{
hasSideEffect = false;
}
break;
}

Expand Down Expand Up @@ -5963,6 +5984,20 @@ bool emitter::HasSideEffect(instruction ins, emitAttr size)
break;
}

case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
{
// These EVEX instructions merges/masks based on k-register
// TODO-XArch-AVX512 : Handle merge/masks scenarios once k-mask support is added for these.
assert(UseEvexEncoding());
hasSideEffect = (size != EA_64BYTE);
break;
}

case INS_movsxd:
{
// Sign-extends the source
Expand Down Expand Up @@ -6152,7 +6187,13 @@ void emitter::emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regN
case INS_movapd:
case INS_movaps:
case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_movsdsse2:
case INS_movss:
case INS_movupd:
Expand Down Expand Up @@ -17350,7 +17391,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
break;

case INS_movdqa:
case INS_movdqa32:
case INS_movdqa64:
case INS_movdqu:
case INS_movdqu8:
case INS_movdqu16:
case INS_movdqu32:
case INS_movdqu64:
case INS_movaps:
case INS_movups:
case INS_movapd:
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
case INS_vfnmsub231sd:
case INS_unpcklpd:
case INS_vpermilpdvar:
case INS_movdqa64:
case INS_movdqu16:
case INS_movdqu64:
case INS_vinsertf64x4:
Expand Down Expand Up @@ -402,6 +403,7 @@ bool IsWEvexOpcodeExtension(const instrDesc* id)
case INS_vpdpbusds:
case INS_vpdpwssds:
case INS_vpermilpsvar:
case INS_movdqa32:
case INS_movdqu8:
case INS_movdqu32:
case INS_vinsertf32x8:
Expand Down
32 changes: 28 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18930,6 +18930,7 @@ bool GenTree::isContainableHWIntrinsic() const
case NI_SSE2_LoadAlignedVector128:
case NI_SSE2_LoadScalarVector128:
case NI_AVX_LoadAlignedVector256:
case NI_AVX512F_LoadAlignedVector512:
{
// These loads are contained as part of a HWIntrinsic operation
return true;
Expand Down Expand Up @@ -21555,7 +21556,12 @@ GenTree* Compiler::gtNewSimdLoadAlignedNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_LoadAlignedVector512;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_LoadAlignedVector256;
Expand Down Expand Up @@ -21616,7 +21622,15 @@ GenTree* Compiler::gtNewSimdLoadNonTemporalNode(
// We don't guarantee a non-temporal load will actually occur, so fallback
// to regular aligned loads if the required ISA isn't supported.

if (simdSize == 32)
if (simdSize == 64)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX512F))
{
intrinsic = NI_AVX512F_LoadAlignedVector512NonTemporal;
isNonTemporal = true;
}
}
else if (simdSize == 32)
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX2))
{
Expand Down Expand Up @@ -22877,7 +22891,12 @@ GenTree* Compiler::gtNewSimdStoreAlignedNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAligned;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_StoreAligned;
Expand Down Expand Up @@ -22934,7 +22953,12 @@ GenTree* Compiler::gtNewSimdStoreNonTemporalNode(

NamedIntrinsic intrinsic = NI_Illegal;

if (simdSize == 32)
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
intrinsic = NI_AVX512F_StoreAlignedNonTemporal;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_AVX_StoreAlignedNonTemporal;
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,14 @@ HARDWARE_INTRINSIC(Vector256, Xor,
// Vector512 Intrinsics
HARDWARE_INTRINSIC(Vector512, Create, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Vector512, Load, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, Store, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down Expand Up @@ -729,6 +737,11 @@ HARDWARE_INTRINSIC(AVX2, Xor,
// AVX512F Intrinsics
HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad)
HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, {INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa32, INS_movdqa64, INS_movdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg)


// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1665,8 +1665,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_AVX_LoadVector256:
case NI_Vector128_Load:
case NI_Vector256_Load:
case NI_Vector512_Load:
case NI_Vector128_LoadUnsafe:
case NI_Vector256_LoadUnsafe:
case NI_Vector512_LoadUnsafe:
{
if (sig->numArgs == 2)
{
Expand Down Expand Up @@ -1698,6 +1700,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_LoadAligned:
case NI_Vector256_LoadAligned:
case NI_Vector512_LoadAligned:
{
assert(sig->numArgs == 1);

Expand All @@ -1716,6 +1719,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_LoadAlignedNonTemporal:
case NI_Vector256_LoadAlignedNonTemporal:
case NI_Vector512_LoadAlignedNonTemporal:
{
assert(sig->numArgs == 1);

Expand Down Expand Up @@ -2086,8 +2090,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_Store:
case NI_Vector256_Store:
case NI_Vector512_Store:
case NI_Vector128_StoreUnsafe:
case NI_Vector256_StoreUnsafe:
case NI_Vector512_StoreUnsafe:
{
assert(retType == TYP_VOID);
var_types simdType = getSIMDTypeForSize(simdSize);
Expand Down Expand Up @@ -2130,6 +2136,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_StoreAligned:
case NI_Vector256_StoreAligned:
case NI_Vector512_StoreAligned:
{
assert(sig->numArgs == 2);
assert(retType == TYP_VOID);
Expand All @@ -2155,6 +2162,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector128_StoreAlignedNonTemporal:
case NI_Vector256_StoreAlignedNonTemporal:
case NI_Vector512_StoreAlignedNonTemporal:
{
assert(sig->numArgs == 2);
assert(retType == TYP_VOID);
Expand Down
Loading