Skip to content

Commit

Permalink
Fix some low-hanging HWIntrinsic issues (#80626)
Browse files Browse the repository at this point in the history
* Ensure isSimdAsHWIntrinsic is false for Vector64/128/256.WithElement

* Ensure AsVector2 and AsVector3 are intrinsic

* Ensure AsVector128(Vector2) and AsVector128(Vector3) are handled as intrinsic

* Ensure Vector64/128/256.AsNInt and AsNUInt are handled as intrinsic

* Ensure Vector*<T>.IsSupported is handled as intrinsic

* Ensure get_Count for Vector64/128/256<T> and Vector<T> is handled as intrinsic

* Apply formatting patch

* Ensure the right simdSize is selected for AsVector128

* Ensure AsVector2/3 are correctly handled in lowering

* Ensure AsVector2 passes in op1 on Arm64
  • Loading branch information
tannergooding authored Jan 15, 2023
1 parent 1d5d42a commit 968b4f8
Show file tree
Hide file tree
Showing 20 changed files with 365 additions and 99 deletions.
17 changes: 6 additions & 11 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1216,25 +1216,20 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed

case NI_IsSupported_True:
case NI_IsSupported_False:
case NI_IsSupported_Type:
{
foldableIntrinsic = true;
pushedStack.PushConstant();
break;
}
#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS)
case NI_Vector128_get_Count:
case NI_Vector256_get_Count:
foldableIntrinsic = true;
pushedStack.PushConstant();
// TODO: check if it's a loop condition - we unroll such loops.
break;
#elif defined(TARGET_ARM64) && defined(FEATURE_HW_INTRINSICS)
case NI_Vector64_get_Count:
case NI_Vector128_get_Count:

case NI_Vector_GetCount:
{
foldableIntrinsic = true;
pushedStack.PushConstant();
// TODO: for FEATURE_SIMD check if it's a loop condition - we unroll such loops.
break;
#endif
}

default:
{
Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,15 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
}
#endif

if ((strcmp(methodName, "get_IsSupported") == 0) || isHardwareAcceleratedProp)
bool isSupportedProp = (strcmp(methodName, "get_IsSupported") == 0);

if (isSupportedProp && (strncmp(className, "Vector", 6) == 0))
{
// The Vector*<T>.IsSupported props report if T is supported & is specially handled in lookupNamedIntrinsic
return NI_Illegal;
}

if (isSupportedProp || isHardwareAcceleratedProp)
{
// The `compSupportsHWIntrinsic` above validates `compSupportsIsa` indicating
// that the compiler can emit instructions for the ISA but not whether the
Expand Down
122 changes: 108 additions & 14 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector64_AsInt16:
case NI_Vector64_AsInt32:
case NI_Vector64_AsInt64:
case NI_Vector64_AsNInt:
case NI_Vector64_AsNUInt:
case NI_Vector64_AsSByte:
case NI_Vector64_AsSingle:
case NI_Vector64_AsUInt16:
Expand All @@ -392,14 +394,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector128_AsInt16:
case NI_Vector128_AsInt32:
case NI_Vector128_AsInt64:
case NI_Vector128_AsNInt:
case NI_Vector128_AsNUInt:
case NI_Vector128_AsSByte:
case NI_Vector128_AsSingle:
case NI_Vector128_AsUInt16:
case NI_Vector128_AsUInt32:
case NI_Vector128_AsUInt64:
case NI_Vector128_AsVector:
case NI_Vector128_AsVector4:
case NI_Vector128_AsVector128:
{
assert(!sig->hasThis());
assert(numArgs == 1);
Expand All @@ -414,6 +417,109 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector128_AsVector2:
{
assert(sig->numArgs == 1);
assert((simdSize == 16) && (simdBaseType == TYP_FLOAT));
assert(retType == TYP_SIMD8);

op1 = impSIMDPopStack(TYP_SIMD16);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_GetLower, simdBaseJitType, simdSize);
break;
}

case NI_Vector128_AsVector3:
{
assert(sig->numArgs == 1);
assert((simdSize == 16) && (simdBaseType == TYP_FLOAT));
assert(retType == TYP_SIMD12);

op1 = impSIMDPopStack(TYP_SIMD16);
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
break;
}

case NI_Vector128_AsVector128:
{
assert(!sig->hasThis());
assert(numArgs == 1);
assert(retType == TYP_SIMD16);

switch (getSIMDTypeForSize(simdSize))
{
case TYP_SIMD8:
{
assert((simdSize == 8) && (simdBaseType == TYP_FLOAT));

op1 = impSIMDPopStack(TYP_SIMD8);

if (op1->IsCnsVec())
{
GenTreeVecCon* vecCon = op1->AsVecCon();
vecCon->gtType = TYP_SIMD16;

vecCon->gtSimd16Val.f32[2] = 0.0f;
vecCon->gtSimd16Val.f32[3] = 0.0f;

return vecCon;
}

GenTree* idx = gtNewIconNode(2, TYP_INT);
GenTree* zero = gtNewZeroConNode(TYP_FLOAT);
op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16,
/* isSimdAsHWIntrinsic */ false);

idx = gtNewIconNode(3, TYP_INT);
zero = gtNewZeroConNode(TYP_FLOAT);
retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16,
/* isSimdAsHWIntrinsic */ false);

break;
}

case TYP_SIMD12:
{
assert((simdSize == 12) && (simdBaseType == TYP_FLOAT));

op1 = impSIMDPopStack(TYP_SIMD12);

if (op1->IsCnsVec())
{
GenTreeVecCon* vecCon = op1->AsVecCon();
vecCon->gtType = TYP_SIMD16;

vecCon->gtSimd16Val.f32[3] = 0.0f;
return vecCon;
}

GenTree* idx = gtNewIconNode(3, TYP_INT);
GenTree* zero = gtNewZeroConNode(TYP_FLOAT);
retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16,
/* isSimdAsHWIntrinsic */ false);
break;
}

case TYP_SIMD16:
{
// We fold away the cast here, as it only exists to satisfy
// the type system. It is safe to do this here since the retNode type
// and the signature return type are both the same TYP_SIMD.

retNode = impSIMDPopStack(retType, /* expectAddr: */ false, sig->retTypeClass);
SetOpLclRelatedToSIMDIntrinsic(retNode);
assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass)));
break;
}

default:
{
unreached();
}
}

break;
}

case NI_Vector64_BitwiseAnd:
case NI_Vector128_BitwiseAnd:
case NI_Vector64_op_BitwiseAnd:
Expand Down Expand Up @@ -743,18 +849,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector64_get_Count:
case NI_Vector128_get_Count:
{
assert(!sig->hasThis());
assert(numArgs == 0);

GenTreeIntCon* countNode = gtNewIconNode(getSIMDVectorLength(simdSize, simdBaseType), TYP_INT);
countNode->gtFlags |= GTF_ICON_SIMD_COUNT;
retNode = countNode;
break;
}

case NI_Vector64_Divide:
case NI_Vector128_Divide:
case NI_Vector64_op_Division:
Expand Down Expand Up @@ -1738,7 +1832,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
GenTree* vectorOp = impSIMDPopStack(getSIMDTypeForSize(simdSize));

retNode = gtNewSimdWithElementNode(retType, vectorOp, indexOp, valueOp, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ true);
/* isSimdAsHWIntrinsic */ false);
break;
}

Expand Down
19 changes: 14 additions & 5 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -955,18 +955,27 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Vector128_AsVector3:
{
// AsVector3 can be a no-op when it's already in the right register, otherwise
// we just need to move the value over. Vector3 operations will themselves mask
// out the upper element when it's relevant, so it's not worth us spending extra
// cycles doing so here.

GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ true);
break;
}

case NI_Vector64_ToScalar:
case NI_Vector128_ToScalar:
{
const ssize_t indexValue = 0;

// no-op if vector is float/double, targetReg == op1Reg and fetching for 0th index.
if ((varTypeIsFloating(intrin.baseType) && (targetReg == op1Reg) && (indexValue == 0)))
if ((varTypeIsFloating(intrin.baseType) && (targetReg == op1Reg)))
{
// no-op if vector is float/double and targetReg == op1Reg
break;
}

GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, indexValue,
GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(intrin.baseType), targetReg, op1Reg, /* imm */ 0,
INS_OPTS_NONE);
}
break;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Vector128_AsVector2:
case NI_Vector128_AsVector3:
case NI_Vector128_ToScalar:
case NI_Vector256_ToScalar:
{
Expand Down
Loading

0 comments on commit 968b4f8

Please sign in to comment.