Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mark and expose additional Vector functions as Intrinsic #77562

Merged
merged 13 commits into from
Nov 2, 2022
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -2309,7 +2309,7 @@ class Compiler

GenTree* gtNewZeroConNode(var_types type);

GenTree* gtNewOneConNode(var_types type);
GenTree* gtNewOneConNode(var_types type, var_types simdBaseType = TYP_UNDEF);

GenTreeLclVar* gtNewStoreLclVar(unsigned dstLclNum, GenTree* src);

Expand Down
127 changes: 124 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7186,9 +7186,11 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
case TYP_SIMD12:
case TYP_SIMD16:
case TYP_SIMD32:
{
zero = gtNewVconNode(type);
zero->AsVecCon()->gtSimd32Val = {};
break;
}
#endif // FEATURE_SIMD

default:
Expand All @@ -7198,9 +7200,10 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
return zero;
}

GenTree* Compiler::gtNewOneConNode(var_types type)
GenTree* Compiler::gtNewOneConNode(var_types type, var_types simdBaseType /* = TYP_UNDEF */)
{
GenTree* one;

switch (type)
{
case TYP_INT:
Expand All @@ -7218,6 +7221,88 @@ GenTree* Compiler::gtNewOneConNode(var_types type)
one = gtNewDconNode(1.0, type);
break;

#ifdef FEATURE_SIMD
case TYP_SIMD8:
case TYP_SIMD12:
case TYP_SIMD16:
case TYP_SIMD32:
{
GenTreeVecCon* vecCon = gtNewVconNode(type);

unsigned simdSize = genTypeSize(type);
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);

switch (simdBaseType)
{
case TYP_BYTE:
case TYP_UBYTE:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u8[index] = 1;
}
break;
}

case TYP_SHORT:
case TYP_USHORT:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u16[index] = 1;
}
break;
}

case TYP_INT:
case TYP_UINT:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u32[index] = 1;
}
break;
}

case TYP_LONG:
case TYP_ULONG:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.u64[index] = 1;
}
break;
}

case TYP_FLOAT:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.f32[index] = 1.0f;
}
break;
}

case TYP_DOUBLE:
{
for (uint32_t index = 0; index < simdLength; index++)
{
vecCon->gtSimd32Val.f64[index] = 1.0;
}
break;
}

default:
{
unreached();
}
}

one = vecCon;
break;
}
#endif // FEATURE_SIMD

default:
unreached();
}
Expand Down Expand Up @@ -19116,6 +19201,11 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
// TODO-XARCH-CQ: We could support division by constant for integral types
assert(varTypeIsFloating(simdBaseType));

if (varTypeIsArithmetic(op2))
{
op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}

if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
Expand All @@ -19137,9 +19227,22 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
case GT_RSZ:
{
assert(!varTypeIsByte(simdBaseType));
assert(!varTypeIsFloating(simdBaseType));
assert((op != GT_RSH) || !varTypeIsUnsigned(simdBaseType));

// float and double don't have actual instructions for shifting
// so we'll just use the equivalent integer instruction instead.

if (simdBaseType == TYP_FLOAT)
{
simdBaseJitType = CORINFO_TYPE_INT;
simdBaseType = TYP_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseJitType = CORINFO_TYPE_LONG;
simdBaseType = TYP_LONG;
}

// "over shifting" is platform specific behavior. We will match the C# behavior
// this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
// exceed the number of bits available in `T`. This is roughly equivalent to
Expand Down Expand Up @@ -19450,6 +19553,11 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
// TODO-AARCH-CQ: We could support division by constant for integral types
assert(varTypeIsFloating(simdBaseType));

if (varTypeIsArithmetic(op2))
{
op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
}

if ((simdSize == 8) && (simdBaseType == TYP_DOUBLE))
{
intrinsic = NI_AdvSimd_DivideScalar;
Expand All @@ -19465,9 +19573,22 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps op,
case GT_RSH:
case GT_RSZ:
{
assert(!varTypeIsFloating(simdBaseType));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that Vector*<T> is going to support shifting so it makes sense that the restriction is lifted, then you normalize the base type.

assert((op != GT_RSH) || !varTypeIsUnsigned(simdBaseType));

// float and double don't have actual instructions for shifting
// so we'll just use the equivalent integer instruction instead.

if (simdBaseType == TYP_FLOAT)
{
simdBaseJitType = CORINFO_TYPE_INT;
simdBaseType = TYP_INT;
}
else if (simdBaseType == TYP_DOUBLE)
{
simdBaseJitType = CORINFO_TYPE_LONG;
simdBaseType = TYP_LONG;
}

// "over shifting" is platform specific behavior. We will match the C# behavior
// this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
// exceed the number of bits available in `T`. This is roughly equivalent to
Expand Down
16 changes: 13 additions & 3 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_Vector64_get_One:
case NI_Vector128_get_One:
{
assert(sig->numArgs == 0);
retNode = gtNewOneConNode(retType, simdBaseType);
break;
}

case NI_Vector64_get_Zero:
case NI_Vector128_get_Zero:
{
Expand Down Expand Up @@ -1544,11 +1552,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector128_ShiftRightArithmetic:
{
assert(sig->numArgs == 2);
genTreeOps op = varTypeIsSigned(simdBaseType) ? GT_RSH : GT_RSZ;

op2 = impPopStack().val;
op1 = impSIMDPopStack(retType);

retNode = gtNewSimdBinOpNode(GT_RSH, retType, op1, op2, simdBaseJitType, simdSize,
retNode = gtNewSimdBinOpNode(op, retType, op1, op2, simdBaseJitType, simdSize,
/* isSimdAsHWIntrinsic */ false);
break;
}
Expand Down Expand Up @@ -1743,17 +1752,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(numArgs == 3);
GenTree* indexOp = impStackTop(1).val;

if (!indexOp->OperIsConst())
{
// TODO-XARCH-CQ: We should always import these like we do with GetElement
// TODO-ARM64-CQ: We should always import these like we do with GetElement
// If index is not constant use software fallback.
return nullptr;
}

ssize_t imm8 = indexOp->AsIntCon()->IconValue();
ssize_t count = simdSize / genTypeSize(simdBaseType);

if (imm8 >= count || imm8 < 0)
if ((imm8 >= count) || (imm8 < 0))
{
// Using software fallback if index is out of range (throw exception)
return nullptr;
Expand Down
Loading