Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Arm64] Implement LoadPairVector64 and LoadPairVector128 #52424

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
a920b9b
Add LoadPairVector64 and LoadPairVector128 in AdvSimd.cs AdvSimd.Plat…
echesakov Jul 9, 2020
2900887
Add LoadPairScalarVector64 in AdvSimd.cs AdvSimd.PlatformNotSupported.cs
echesakov Jul 9, 2020
197bd6f
Add LoadPairVector64NonTemporal and LoadPairVector128NonTemporal in A…
echesakov Jul 9, 2020
5286ac9
Add LoadPairScalarVector64NonTemporal in AdvSimd.cs AdvSimd.PlatformN…
echesakov Jul 9, 2020
ccda678
Update System.Runtime.Intrinsics.cs
echesakov Jul 14, 2020
dbb5111
Add LoadPairScalar() in src/tests/JIT/HardwareIntrinsics/Arm/Shared/H…
echesakov Nov 19, 2020
969e5b2
Add LoadPairVectorTest.template
echesakov Nov 18, 2020
6823274
Add LoadPairVector64 and LoadPairVector128 in GenerateTests.csx
echesakov Nov 18, 2020
7b5f025
Update src/tests/JIT/HardwareIntrinsics/Arm/AdvSimd.Arm64/*
echesakov Nov 18, 2020
d819d99
Add HW_Flag_ReturnsStruct and HWIntrinsicInfo::ReturnsStruct() in hwi…
echesakov Apr 15, 2021
e336d06
Support intrinsics that return structs in Compiler::impHWIntrinsic() …
echesakov Nov 17, 2020
61d0c48
Relax assertion in Compiler::impAssignStructPtr() and allow certain i…
echesakov Apr 15, 2021
bc8f2e6
Add special handling for hardware intrinsics returning struct in fgMo…
echesakov Apr 15, 2021
5bb2e46
Add LoadPairVector64 and LoadPairVector128 in hwintrinsiclistarm64.h
echesakov Jul 14, 2020
a1b9018
Support multi-register HW intrinsics on arm64 in gentree.h gentree.cpp
echesakov Apr 16, 2021
39157b5
Support multi-register HW intrinsics on arm64 in lsraAssignRegToTree …
echesakov Apr 16, 2021
200a78a
Extend LinearScan::BuildHWIntrinsic to support intrinsics returning v…
echesakov Nov 18, 2020
ff79ad4
Allow promotion of HFAs with CUSTOMLAYOUT flag in lclvars.cpp
echesakov Apr 16, 2021
a99b2bc
Support LoadPairVector128/64 in CodeGen::genHWIntrinsic in hwintrinsi…
echesakov Nov 17, 2020
25dcf77
Transform GT_STORE_OBJ to GT_STORE_BLK when src is a multireg intrins…
echesakov Apr 16, 2021
f2f099d
Support multi-reg intrinsics in CodeGen::genCodeForCpBlkUnroll() in c…
echesakov Apr 16, 2021
47fb926
Consume multi-reg nodes registers in LinearScan::BuildBlockStore() in…
echesakov Apr 16, 2021
db22dcc
Use AdvSimd.Arm64.LoadPairVector128 in ASCIIUtility.cs
echesakov May 5, 2021
0d45572
Use AdvSimd.Arm64.StorePair in BitArray.cs
echesakov May 5, 2021
fab6ef3
Use AdvSimd.Arm64.LoadPairVector128 in OptimizedInboxTextEncoder.AdvS…
echesakov May 5, 2021
82bb175
[mono] Implement LoadPair{,Scalar}Vector{64,128}{,NonTemporal}
imhameed Jun 26, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 55 additions & 21 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2130,34 +2130,40 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
int srcOffset = 0;
GenTree* src = node->Data();

assert(src->isContained());

if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD))
if (src->isContained())
{
srcLclNum = src->AsLclVarCommon()->GetLclNum();
srcOffset = src->AsLclVarCommon()->GetLclOffs();
}
else
{
assert(src->OperIs(GT_IND));
GenTree* srcAddr = src->AsIndir()->Addr();

if (!srcAddr->isContained())
if (src->OperIs(GT_LCL_VAR, GT_LCL_FLD))
{
srcAddrBaseReg = genConsumeReg(srcAddr);
}
else if (srcAddr->OperIsAddrMode())
{
srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base());
srcOffset = srcAddr->AsAddrMode()->Offset();
srcLclNum = src->AsLclVarCommon()->GetLclNum();
srcOffset = src->AsLclVarCommon()->GetLclOffs();
}
else
{
assert(srcAddr->OperIsLocalAddr());
srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum();
srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs();
assert(src->OperIs(GT_IND));
GenTree* srcAddr = src->AsIndir()->Addr();

if (!srcAddr->isContained())
{
srcAddrBaseReg = genConsumeReg(srcAddr);
}
else if (srcAddr->OperIsAddrMode())
{
srcAddrBaseReg = genConsumeReg(srcAddr->AsAddrMode()->Base());
srcOffset = srcAddr->AsAddrMode()->Offset();
}
else
{
assert(srcAddr->OperIsLocalAddr());
srcLclNum = srcAddr->AsLclVarCommon()->GetLclNum();
srcOffset = srcAddr->AsLclVarCommon()->GetLclOffs();
}
}
}
else
{
assert(src->OperIsHWIntrinsic());
assert(src->TypeGet() == TYP_STRUCT);
}

if (node->IsVolatile())
{
Expand All @@ -2175,6 +2181,34 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
regNumber tempReg = node->ExtractTempReg(RBM_ALLINT);

#ifdef TARGET_ARM64
if (src->OperIsHWIntrinsic())
{
const GenTreeHWIntrinsic* intrinsic = src->AsHWIntrinsic();

assert(src->IsMultiRegNode());
const int srcCount = src->GetMultiRegCount();

for (int srcIndex = 0; srcIndex < srcCount; srcIndex++)
{
const regNumber srcReg = src->GetRegByIndex(srcIndex);
const var_types srcRegType = src->GetRegTypeByIndex(srcIndex);
const int srcRegSize = genTypeSize(srcRegType);
const emitAttr attr = emitTypeSize(srcRegType);

if (dstLclNum != BAD_VAR_NUM)
{
GetEmitter()->emitIns_S_R(INS_str, attr, srcReg, dstLclNum, dstOffset);
}
else
{
GetEmitter()->emitIns_R_R_I(INS_str, attr, srcReg, dstAddrBaseReg, dstOffset);
}

dstOffset += srcRegSize;
size -= srcRegSize;
}
}

if (size >= 2 * REGSIZE_BYTES)
{
regNumber tempReg2 = node->ExtractTempReg(RBM_ALLINT);
Expand Down
27 changes: 23 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -715,14 +715,33 @@ int GenTree::GetRegisterDstCount(Compiler* compiler) const
#endif
}
#endif

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
if (OperIs(GT_HWINTRINSIC))
else if (OperIsHWIntrinsic())
{
assert(TypeGet() == TYP_STRUCT);
#ifdef TARGET_ARM64
const GenTreeHWIntrinsic* intrinsic = AsHWIntrinsic();
const NamedIntrinsic intrinsicId = intrinsic->gtHWIntrinsicId;
assert(HWIntrinsicInfo::ReturnsStruct(intrinsicId));

switch (intrinsicId)
{
// TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers.
case NI_AdvSimd_Arm64_LoadPairScalarVector64:
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
return 2;

default:
unreached();
}
#elif defined(TARGET_XARCH)
return 2;
}
#endif
}

if (OperIsScalarLocal())
{
return AsLclVar()->GetFieldCount(compiler);
Expand Down
62 changes: 47 additions & 15 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1840,7 +1840,7 @@ struct GenTree
inline bool IsMultiRegNode() const;

// Returns the number of registers defined by a multireg node.
unsigned GetMultiRegCount();
unsigned GetMultiRegCount() const;

// Returns the regIndex'th register defined by a possibly-multireg node.
regNumber GetRegByIndex(int regIndex);
Expand Down Expand Up @@ -6720,7 +6720,7 @@ struct GenTreeCopyOrReload : public GenTreeUnOp
#endif
}

unsigned GetRegCount()
unsigned GetRegCount() const
{
#if FEATURE_MULTIREG_RET
// We need to return the highest index for which we have a valid register.
Expand Down Expand Up @@ -7550,12 +7550,12 @@ inline bool GenTree::IsMultiRegNode() const
return true;
}
#endif // FEATURE_MULTIREG_RET
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
if (OperIs(GT_HWINTRINSIC))

if (OperIsHWIntrinsic())
{
return (TypeGet() == TYP_STRUCT);
}
#endif

if (IsMultiRegLclVar())
{
return true;
Expand All @@ -7571,7 +7571,7 @@ inline bool GenTree::IsMultiRegNode() const
// Return Value:
// Returns the number of registers defined by this node.
//
inline unsigned GenTree::GetMultiRegCount()
inline unsigned GenTree::GetMultiRegCount() const
{
#if FEATURE_MULTIREG_RET
if (IsMultiRegCall())
Expand All @@ -7598,13 +7598,33 @@ inline unsigned GenTree::GetMultiRegCount()
return AsCopyOrReload()->GetRegCount();
}
#endif // FEATURE_MULTIREG_RET
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
if (OperIs(GT_HWINTRINSIC))

if (OperIsHWIntrinsic())
{
assert(TypeGet() == TYP_STRUCT);
#ifdef TARGET_ARM64
const GenTreeHWIntrinsic* intrinsic = AsHWIntrinsic();
const NamedIntrinsic intrinsicId = intrinsic->gtHWIntrinsicId;

switch (intrinsicId)
{
// TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers.
case NI_AdvSimd_Arm64_LoadPairScalarVector64:
case NI_AdvSimd_Arm64_LoadPairScalarVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector64:
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
case NI_AdvSimd_Arm64_LoadPairVector128:
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
return 2;

default:
unreached();
}
#elif defined(TARGET_XARCH)
return 2;
}
#endif
}

if (OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR))
{
assert((gtFlags & GTF_VAR_MULTIREG) != 0);
Expand Down Expand Up @@ -7667,13 +7687,14 @@ inline regNumber GenTree::GetRegByIndex(int regIndex)
return AsCopyOrReload()->GetRegNumByIdx(regIndex);
}
#endif // FEATURE_MULTIREG_RET
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
#ifdef FEATURE_HW_INTRINSICS
if (OperIs(GT_HWINTRINSIC))
{
assert(regIndex == 1);
// TODO-ARM64-NYI: Support hardware intrinsics operating on multiple contiguous registers.
return AsHWIntrinsic()->GetOtherReg();
}
#endif
#endif // FEATURE_HW_INTRINSICS
if (OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR))
{
return AsLclVar()->GetRegNumByIdx(regIndex);
Expand Down Expand Up @@ -7724,15 +7745,26 @@ inline var_types GenTree::GetRegTypeByIndex(int regIndex)

#endif // FEATURE_MULTIREG_RET

#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
if (OperIs(GT_HWINTRINSIC))
if (OperIsHWIntrinsic())
{
assert(TypeGet() == TYP_STRUCT);
#ifdef TARGET_ARM64
if (AsHWIntrinsic()->GetSimdSize() == 16)
{
return TYP_SIMD16;
}
else
{
assert(AsHWIntrinsic()->GetSimdSize() == 8);
return TYP_SIMD8;
}
#elif defined(TARGET_XARCH)
// At this time, the only multi-reg HW intrinsics all return the type of their
// arguments. If this changes, we will need a way to record or determine this.
assert(TypeGet() == TYP_STRUCT);
return gtGetOp1()->TypeGet();
}
#endif
}

if (OperIs(GT_LCL_VAR, GT_STORE_LCL_VAR))
{
if (TypeGet() == TYP_LONG)
Expand Down
27 changes: 21 additions & 6 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -843,14 +843,23 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
{
unsigned int sizeBytes;
simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
retType = getSIMDTypeForSize(sizeBytes);
assert(sizeBytes != 0);

// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
// rather than deferring the decision after getting the simdBaseJitType of arg.
if (!isSupportedBaseType(intrinsic, simdBaseJitType))
if (HWIntrinsicInfo::ReturnsStruct(intrinsic))
{
return nullptr;
assert(sizeBytes == 0);
}
else
{
assert(sizeBytes != 0);

// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
// rather than deferring the decision after getting the simdBaseJitType of arg.
if (!isSupportedBaseType(intrinsic, simdBaseJitType))
{
return nullptr;
}

retType = getSIMDTypeForSize(sizeBytes);
}
}

Expand Down Expand Up @@ -1224,6 +1233,12 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
//
retNode->gtFlags |= (GTF_GLOB_REF | GTF_EXCEPT);
}

if (HWIntrinsicInfo::ReturnsStruct(intrinsic))
{
retNode->SetLayout(typGetObjLayout(sig->retTypeSigClass));
}

return retNode;
}

Expand Down
32 changes: 20 additions & 12 deletions src/coreclr/jit/hwintrinsic.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,50 +113,52 @@ enum HWIntrinsicFlag : unsigned int
// but may be table-driven in the back-end
HW_Flag_SpecialImport = 0x100,

// The intrinsic returns a struct.
HW_Flag_ReturnsStruct = 0x200,

// The below is for defining platform-specific flags
#if defined(TARGET_XARCH)
// Full range IMM intrinsic
// - the immediate value is valid on the full range of imm8 (0-255)
HW_Flag_FullRangeIMM = 0x200,
HW_Flag_FullRangeIMM = 0x400,

// Maybe IMM
// the intrinsic has either imm or Vector overloads
HW_Flag_MaybeIMM = 0x400,
HW_Flag_MaybeIMM = 0x800,

// Copy Upper bits
// some SIMD scalar intrinsics need the semantics of copying upper bits from the source operand
HW_Flag_CopyUpperBits = 0x800,
HW_Flag_CopyUpperBits = 0x1000,

// Maybe Memory Load/Store
// - some intrinsics may have pointer overloads but without HW_Category_MemoryLoad/HW_Category_MemoryStore
HW_Flag_MaybeMemoryLoad = 0x1000,
HW_Flag_MaybeMemoryStore = 0x2000,
HW_Flag_MaybeMemoryLoad = 0x2000,
HW_Flag_MaybeMemoryStore = 0x4000,

// No Read/Modify/Write Semantics
// the intrinsic doesn't have read/modify/write semantics in two/three-operand form.
HW_Flag_NoRMWSemantics = 0x4000,
HW_Flag_NoRMWSemantics = 0x8000,

// NoContainment
// the intrinsic cannot be handled by containment,
// all the intrinsic that have explicit memory load/store semantics should have this flag
HW_Flag_NoContainment = 0x8000,
HW_Flag_NoContainment = 0x10000

#elif defined(TARGET_ARM64)
// The intrinsic has an immediate operand
// - the value can be (and should be) encoded in a corresponding instruction when the operand value is constant
HW_Flag_HasImmediateOperand = 0x200,
HW_Flag_HasImmediateOperand = 0x400,

// The intrinsic has read/modify/write semantics in multiple-operands form.
HW_Flag_HasRMWSemantics = 0x400,
HW_Flag_HasRMWSemantics = 0x800,

// The intrinsic operates on the lower part of a SIMD register
// - the upper part of the source registers are ignored
// - the upper part of the destination register is zeroed
HW_Flag_SIMDScalar = 0x800,
HW_Flag_SIMDScalar = 0x1000,

// The intrinsic supports some sort of containment analysis
HW_Flag_SupportsContainment = 0x1000

HW_Flag_SupportsContainment = 0x2000
#else
#error Unsupported platform
#endif
Expand Down Expand Up @@ -701,6 +703,12 @@ struct HWIntrinsicInfo
return (flags & HW_Flag_SpecialImport) != 0;
}

static bool ReturnsStruct(NamedIntrinsic id)
{
const HWIntrinsicFlag flags = lookupFlags(id);
return (flags & HW_Flag_ReturnsStruct) != 0;
}

#ifdef TARGET_ARM64
static bool SIMDScalar(NamedIntrinsic id)
{
Expand Down
Loading