Skip to content

Commit

Permalink
Change fgMorphMultiOp to fgMorphHWIntrinsic (#103732)
Browse files Browse the repository at this point in the history
* Remove a dead optimization from fgMorphMultiOp

* Change fgMorphMultiOp to fgMorphHWIntrinsic

* Move a couple of folding operations from `fgOptimizeHWIntrinsic` to `gtFoldExprHWIntrinsic`

* Fix the query for if an intrinsic is commutative

* Don't propagate vector constants unnecessarily
  • Loading branch information
tannergooding authored Jun 20, 2024
1 parent 4a7fe65 commit 1bf325c
Show file tree
Hide file tree
Showing 9 changed files with 311 additions and 299 deletions.
90 changes: 4 additions & 86 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3117,101 +3117,19 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,

if (inspectIntrinsic)
{
GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic();
GenTreeVecCon* vecCon = value->AsVecCon();

NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId();
var_types simdBaseType = parent->GetSimdBaseType();
GenTreeHWIntrinsic* parent = destParent->AsHWIntrinsic();
NamedIntrinsic intrinsicId = parent->GetHWIntrinsicId();

if (!HWIntrinsicInfo::CanBenefitFromConstantProp(intrinsicId))
{
return false;
}

// For several of the scenarios below we may skip the costing logic
// For several of the scenarios we may skip the costing logic
// since we know that the operand is always containable and therefore
// is always cost effective to propagate.

switch (intrinsicId)
{
#if defined(TARGET_ARM64)
case NI_Vector64_op_Equality:
case NI_Vector64_op_Inequality:
#endif // TARGET_ARM64
case NI_Vector128_op_Equality:
case NI_Vector128_op_Inequality:
#if defined(TARGET_XARCH)
case NI_Vector256_op_Equality:
case NI_Vector256_op_Inequality:
case NI_Vector512_op_Equality:
case NI_Vector512_op_Inequality:
#endif // TARGET_XARCH
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0.
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}

#if defined(TARGET_ARM64)
case NI_AdvSimd_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
{
// We can optimize when the constant is zero due to a
// specialized encoding for the instruction
return vecCon->IsZero();
}

case NI_AdvSimd_CompareGreaterThan:
case NI_AdvSimd_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThan:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThanScalar:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar:
{
// We can optimize when the constant is zero, but only
// for signed types, due to a specialized encoding for
// the instruction
return vecCon->IsZero() && !varTypeIsUnsigned(simdBaseType);
}
#endif // TARGET_ARM64

#if defined(TARGET_XARCH)
case NI_SSE41_Insert:
{
// We can optimize for float when the constant is zero
// due to a specialized encoding for the instruction
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
}

case NI_EVEX_CompareEqualMask:
case NI_EVEX_CompareNotEqualMask:
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}
#endif // TARGET_XARCH

case NI_Vector128_Shuffle:
#if defined(TARGET_XARCH)
case NI_Vector256_Shuffle:
case NI_Vector512_Shuffle:
#elif defined(TARGET_ARM64)
case NI_Vector64_Shuffle:
#endif
{
// The shuffle indices need to be constant so we can preserve
// the node as a hwintrinsic instead of rewriting as a user call.
assert(parent->GetOperandCount() == 2);
return parent->IsUserCall() && (dest == parent->Op(2));
}

default:
{
break;
}
}
return parent->ShouldConstantProp(dest, value->AsVecCon());
}
#endif // FEATURE_HW_INTRINSICS
}
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6625,9 +6625,10 @@ class Compiler
GenTree* fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp);
GenTree* fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp);
GenTree* fgOptimizeRelationalComparisonWithFullRangeConst(GenTreeOp* cmp);
#ifdef FEATURE_HW_INTRINSICS
#if defined(FEATURE_HW_INTRINSICS)
GenTree* fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree);
GenTree* fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node);
#endif
#endif // FEATURE_HW_INTRINSICS
GenTree* fgOptimizeCommutativeArithmetic(GenTreeOp* tree);
GenTree* fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp);
GenTree* fgOptimizeAddition(GenTreeOp* add);
Expand All @@ -6640,7 +6641,6 @@ class Compiler
GenTree* fgMorphModToSubMulDiv(GenTreeOp* tree);
GenTree* fgMorphUModToAndSub(GenTreeOp* tree);
GenTree* fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropDone);
GenTree* fgMorphMultiOp(GenTreeMultiOp* multiOp);
GenTree* fgMorphConst(GenTree* tree);

GenTreeOp* fgMorphCommutative(GenTreeOp* tree);
Expand Down
228 changes: 222 additions & 6 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20350,6 +20350,12 @@ bool GenTree::isCommutativeHWIntrinsic() const
{
return !varTypeIsFloating(node->GetSimdBaseType());
}

case NI_AVX512F_Add:
case NI_AVX512F_Multiply:
{
return node->GetOperandCount() == 2;
}
#endif // TARGET_XARCH

default:
Expand Down Expand Up @@ -28536,6 +28542,110 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const
}
}

//---------------------------------------------------------------------------------------
// GenTreeHWIntrinsic::ShouldConstantProp:
// Determines if a given operand should be constant propagated
//
// Arguments
// operand - The operand to check
// vecCon - The vector constant to check
//
// Return Value
// true if operand should be constant propagated, otherwise false
//
// Remarks
// This method takes the operand and vector constant given that assertion prop
// may be checking if the underlying constant for a lcl_var should be propagated
//
bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon)
{
assert(HWIntrinsicInfo::CanBenefitFromConstantProp(gtHWIntrinsicId));

var_types simdBaseType = GetSimdBaseType();

switch (gtHWIntrinsicId)
{
#if defined(TARGET_ARM64)
case NI_Vector64_op_Equality:
case NI_Vector64_op_Inequality:
#endif // TARGET_ARM64
case NI_Vector128_op_Equality:
case NI_Vector128_op_Inequality:
#if defined(TARGET_XARCH)
case NI_Vector256_op_Equality:
case NI_Vector256_op_Inequality:
case NI_Vector512_op_Equality:
case NI_Vector512_op_Inequality:
#endif // TARGET_XARCH
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0.
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}

#if defined(TARGET_ARM64)
case NI_AdvSimd_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqual:
case NI_AdvSimd_Arm64_CompareEqualScalar:
{
// We can optimize when the constant is zero due to a
// specialized encoding for the instruction
return vecCon->IsZero();
}

case NI_AdvSimd_CompareGreaterThan:
case NI_AdvSimd_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThan:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqual:
case NI_AdvSimd_Arm64_CompareGreaterThanScalar:
case NI_AdvSimd_Arm64_CompareGreaterThanOrEqualScalar:
{
// We can optimize when the constant is zero, but only
// for signed types, due to a specialized encoding for
// the instruction
return vecCon->IsZero() && !varTypeIsUnsigned(simdBaseType);
}
#endif // TARGET_ARM64

#if defined(TARGET_XARCH)
case NI_SSE41_Insert:
{
// We can optimize for float when the constant is zero
// due to a specialized encoding for the instruction
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
}

case NI_EVEX_CompareEqualMask:
case NI_EVEX_CompareNotEqualMask:
{
// We can optimize when the constant is zero, but only
// for non floating-point since +0.0 == -0.0
return vecCon->IsZero() && !varTypeIsFloating(simdBaseType);
}
#endif // TARGET_XARCH

case NI_Vector128_Shuffle:
#if defined(TARGET_XARCH)
case NI_Vector256_Shuffle:
case NI_Vector512_Shuffle:
#elif defined(TARGET_ARM64)
case NI_Vector64_Shuffle:
#endif
{
// The shuffle indices need to be constant so we can preserve
// the node as a hwintrinsic instead of rewriting as a user call.
assert(GetOperandCount() == 2);
return IsUserCall() && (operand == Op(2));
}

default:
{
break;
}
}

return false;
}
#endif // FEATURE_HW_INTRINSICS

//---------------------------------------------------------------------------------------
Expand Down Expand Up @@ -29567,6 +29677,38 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
return tree;
}

NamedIntrinsic ni = tree->GetHWIntrinsicId();
var_types retType = tree->TypeGet();
var_types simdBaseType = tree->GetSimdBaseType();
CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
unsigned int simdSize = tree->GetSimdSize();

simd_t simdVal = {};

if (GenTreeVecCon::IsHWIntrinsicCreateConstant<simd_t>(tree, simdVal))
{
GenTreeVecCon* vecCon = gtNewVconNode(retType);

for (GenTree* arg : tree->Operands())
{
DEBUG_DESTROY_NODE(arg);
}

vecCon->gtSimdVal = simdVal;

if (fgGlobalMorph)
{
INDEBUG(vecCon->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
;
}

if (vnStore != nullptr)
{
fgValueNumberTreeConst(vecCon);
}
return vecCon;
}

GenTree* op1 = nullptr;
GenTree* op2 = nullptr;
GenTree* op3 = nullptr;
Expand Down Expand Up @@ -29598,6 +29740,86 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
}

if (tree->OperIsConvertMaskToVector())
{
GenTree* op = op1;

if (!op->OperIsHWIntrinsic())
{
return tree;
}

unsigned simdBaseTypeSize = genTypeSize(simdBaseType);
GenTreeHWIntrinsic* cvtOp = op->AsHWIntrinsic();

if (!cvtOp->OperIsConvertVectorToMask())
{
return tree;
}

if ((genTypeSize(cvtOp->GetSimdBaseType()) != simdBaseTypeSize))
{
// We need the operand to be the same kind of mask; otherwise
// the bitwise operation can differ in how it performs
return tree;
}

#if defined(TARGET_XARCH)
GenTree* vectorNode = cvtOp->Op(1);
#elif defined(TARGET_ARM64)
GenTree* vectorNode = cvtOp->Op(2);
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

DEBUG_DESTROY_NODE(op, tree);
INDEBUG(vectorNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);

return vectorNode;
}

if (tree->OperIsConvertVectorToMask())
{
GenTree* op = op1;

#if defined(TARGET_ARM64)
if (!op->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll))
{
return tree;
}
op = op2;
#endif // TARGET_ARM64

if (!op->OperIsHWIntrinsic())
{
return tree;
}

unsigned simdBaseTypeSize = genTypeSize(simdBaseType);
GenTreeHWIntrinsic* cvtOp = op->AsHWIntrinsic();

if (!cvtOp->OperIsConvertMaskToVector())
{
return tree;
}

if ((genTypeSize(cvtOp->GetSimdBaseType()) != simdBaseTypeSize))
{
// We need the operand to be the same kind of mask; otherwise
// the bitwise operation can differ in how it performs
return tree;
}

GenTree* maskNode = cvtOp->Op(1);

#if defined(TARGET_ARM64)
DEBUG_DESTROY_NODE(op1);
#endif // TARGET_ARM64

DEBUG_DESTROY_NODE(op, tree);
return maskNode;
}

bool isScalar = false;
genTreeOps oper = tree->HWOperGet(&isScalar);

Expand Down Expand Up @@ -29630,12 +29852,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)

GenTree* resultNode = tree;

NamedIntrinsic ni = tree->GetHWIntrinsicId();
var_types retType = tree->TypeGet();
var_types simdBaseType = tree->GetSimdBaseType();
CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
unsigned int simdSize = tree->GetSimdSize();

if (otherNode == nullptr)
{
assert(op2 == nullptr);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6639,6 +6639,8 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic

genTreeOps HWOperGet(bool* isScalar) const;

bool ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon);

private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);

Expand Down
Loading

0 comments on commit 1bf325c

Please sign in to comment.