From 87b92fdf5c377e12fc46062042e62b4c8cc6a651 Mon Sep 17 00:00:00 2001 From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> Date: Sat, 20 Nov 2021 05:13:35 +0300 Subject: [PATCH] Make intrinsic nodes multi op (aka delete `GT_LIST`) (#59912) * Introducing GenTreeMultiOp * Rewrite gtNewSIMDNode * Rewrite gtNewSIMDVectorZero * Rewrite gtNewHWIntrinsicNode * Rewrite GenTreeSIMD::OperIsMemoryLoad * Rewrite GenTreeHWIntrinsic::OperIsMemoryLoad * Rewrite GenTreeHWIntrinsic::OperIsMemoryStore * Rewrite GenTree::IsIntegralConstVector * Rewrite GenTree::NullOp1Legal * Rewrite GenTree::IsSIMDZero * Rewrite GenTree::isCommutativeSIMDIntrinsic * Rewrite GenTree::isCommutativeHWIntrinsic * Rewrite GenTree::isContainableHWIntrinsic * Rewrite GenTree::isRMWHWIntrinsic * Rewrite GenTreeVisitor * Rewrite GenTreeUseEdgeIterator * Rewrite GenTree::VisitOperands * Rewrite GenTree::TryGetUse * Rewrite gtGetChildPointer * Rewrite gtHasRef * Rewrite fgSetTreeSeqHelper * Rewrite GenTree::NumChildren * Rewrite GenTree::GetChild * Rewrite GenTree::Compare * Rewrite gtCloneExpr * Rewrite gtSetEvalOrder * Rewrite gtHashValue * Rewrite gtDispTree * Rewrite fgDebugCheckFlags * Add genConsumeMultiOpOperands * Rewrite genConsumeRegs * Rewrite HWIntrinsic::HWIntrinsic * Rewrite HWIntrinsic::InitializeOperands * Delete HWIntrinsicInfo::lookupNumArgs * Delete HWIntrinsicInfo::lookupLastOp * Rewrite HWIntrinsicImmOpHelper ARM64 * Rewrite inst_RV_TT_IV * Rewrite inst_RV_RV_TT * Rewrite genSIMDIntrinsic XARCH * Rewrite genSIMDIntrinsicInit XARCH * Rewrite genSIMDIntrinsicInitN XARCH * Rewrite genSIMDIntrinsicUnOp XARCH * Rewrite genSIMDIntrinsic32BitConvert XARCH * Rewrite genSIMDIntrinsic64BitConvert XARCH * Rewrite genSIMDIntrinsicWiden XARCH * Rewrite genSIMDIntrinsicNarrow XARCH * Rewrite genSIMDIntrinsicBinOp XARCH * Rewrite genSIMDIntrinsicRelOp XARCH * Rewrite genSIMDIntrinsicShuffleSSE2 XARCH * Rewrite genSIMDIntrinsicUpperSave XARCH * Rewrite genSIMDIntrinsicUpperRestore XARCH * Rewrite genSIMDIntrinsic ARM64 * Rewrite genSIMDIntrinsicInit ARM64 * Rewrite genSIMDIntrinsicInitN ARM64 * Rewrite genSIMDIntrinsicUnOp ARM64 * Rewrite genSIMDIntrinsicWiden ARM64 * Rewrite genSIMDIntrinsicNarrow ARM64 * Rewrite genSIMDIntrinsicBinOp ARM64 * Rewrite genSIMDIntrinsicUpperSave ARM64 * Rewrite genSIMDIntrinsicUpperRestore ARM64 * Rewrite genHWIntrinsic_R_RM XARCH * Rewrite genHWIntrinsic_R_RM_I XARCH * Rewrite genHWIntrinsic_R_R_RM XARCH * Rewrite genHWIntrinsic_R_R_RM_I XARCH * Rewrite genHWIntrinsic_R_R_RM_R XARCH * Rewrite genHWIntrinsic_R_R_R_RM XARCH * Rewrite genHWIntrinsic XARCH * Rewrite genBaseIntrinsic XARCH * Rewrite genX86BaseIntrinsic XARCH * Rewrite genSSEIntrinsic XARCH * Rewrite genSSE2Intrinsic XARCH * Rewrite genSSE41Intrinsic XARCH * Rewrite genSSE42Intrinsic XARCH * Rewrite genAvxOrAvx2Intrinsic XARCH * Rewrite genBMI1OrBMI2Intrinsic XARCH * Rewrite genFMAIntrinsic XARCH * Rewrite genLZCNTIntrinsic XARCH * Rewrite genPOPCNTIntrinsic XARCH * Rewrite genXCNTIntrinsic XARCH * Rewrite genHWIntrinsic ARM64 * Rewrite insertUpperVectorSave * Rewrite insertUpperVectorRestore * Rewrite getKillSetForHWIntrinsic * Rewrite BuildSIMD XARCH * Rewrite BuildOperandUses/BuildDelayFreeUses * Rewrite BuildSIMD ARM64 * Rewrite BuildHWIntrinsic XARCH * Rewrite LowerSIMD XARCH * Rewrite ContainCheckSIMD XARCH * Rewrite LowerHWIntrinsicCC XARCH * Rewrite LowerFusedMultiplyAdd XARCH * Rewrite LowerHWIntrinsic XARCH * Rewrite LowerHWIntrinsicCmpOp XARCH * Rewrite LowerHWIntrinsicGetElement XARCH * Rewrite LowerHWIntrinsicWithElement XARCH * Rewrite LowerHWIntrinsicCreate XARCH * Rewrite LowerHWIntrinsicDot XARCH * Rewrite LowerHWIntrinsicToScalar XARCH * Rewrite IsContainableHWIntrinsicOp XARCH * Rewrite ContainCheckHWIntrinsic XARCH * Rewrite IsValidConstForMovImm ARM64 * Rewrite LowerHWIntrinsic ARM64 * Rewrite LowerHWIntrinsicFusedMultiplyAddScalar ARM64 * Rewrite LowerHWIntrinsicCmpOp ARM64 * Rewrite LowerHWIntrinsicCreate ARM64 * Rewrite LowerHWIntrinsicDot ARM64 * Rewrite ContainCheckStoreLoc ARM64 * Rewrite ContainCheckSIMD ARM64 * Rewrite ContainCheckHWIntrinsic ARM64 * Rewrite DecomposeHWIntrinsicGetElement X86 * Rewrite DecomposeHWIntrinsic X86 * Rewrite Rationalizer::RewriteNode * Rewrite optIsCSEcandidate * Rewrite fgValueNumberTree * Rewrite fgValueNumberSimd * Rewrite fgValueNumberHWIntrinsic * Rewrite GetVNFuncForNode * Rewrite fgMorphTree & fgMorphSmpOpOptional * Rewrite fgMorphFieldToSimdGetElement/fgMorphField * Rewrite fgMorphOneAsgBlockOp * Rewrite impInlineFetchArg * Rewrite impSIMDRelOp * Rewrite impSIMDIntrinsic * Rewrite impBaseIntrinsic XARCH * Rewrite impAvxOrAvx2Intrinsic XARCH * Rewrite impSpecialIntrinsic ARM64 * Fix SSA Builder comments * Delete GT_LIST * Support GTF_REVERSE_OPS for GenTreeMultiOp It turns out that in the time this change has been sitting there, 3 new methods in the SPMI benchmarks collection appeared, and it turns out they regress because of the lack of GTF_REVERSE_OPS. So, implement support for it.... This makes me quite sad, but it does make this change a pure zero-diff one, which is good. * Fix Linux x86 build break * Fix formatting * Improve readability through the use of a local * Support external operand arrays in GenTreeMultiOp * Fix formatting * Tweak a constructor call --- src/coreclr/jit/assertionprop.cpp | 3 +- src/coreclr/jit/codegen.h | 6 +- src/coreclr/jit/codegenarm64.cpp | 77 +- src/coreclr/jit/codegenarmarch.cpp | 1 - src/coreclr/jit/codegencommon.cpp | 3 +- src/coreclr/jit/codegenlinear.cpp | 60 +- src/coreclr/jit/codegenxarch.cpp | 1 - src/coreclr/jit/compiler.h | 66 +- src/coreclr/jit/compiler.hpp | 50 +- src/coreclr/jit/decomposelongs.cpp | 21 +- src/coreclr/jit/fgdiagnostic.cpp | 52 +- src/coreclr/jit/flowgraph.cpp | 62 +- src/coreclr/jit/gentree.cpp | 1133 ++++++++++--------- src/coreclr/jit/gentree.h | 647 +++++++---- src/coreclr/jit/gtlist.h | 6 +- src/coreclr/jit/gtstructs.h | 4 +- src/coreclr/jit/hwintrinsic.cpp | 86 -- src/coreclr/jit/hwintrinsic.h | 61 +- src/coreclr/jit/hwintrinsicarm64.cpp | 27 +- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 14 +- src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 301 ++--- src/coreclr/jit/hwintrinsicxarch.cpp | 35 +- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/instr.cpp | 8 +- src/coreclr/jit/lir.cpp | 2 +- src/coreclr/jit/lowerarmarch.cpp | 311 ++--- src/coreclr/jit/lowerxarch.cpp | 814 +++++-------- src/coreclr/jit/lsra.cpp | 10 +- src/coreclr/jit/lsra.h | 4 +- src/coreclr/jit/lsraarm.cpp | 1 - src/coreclr/jit/lsraarm64.cpp | 33 +- src/coreclr/jit/lsrabuild.cpp | 26 +- src/coreclr/jit/lsraxarch.cpp | 128 +-- src/coreclr/jit/morph.cpp | 175 ++- src/coreclr/jit/optcse.cpp | 2 +- src/coreclr/jit/rationalize.cpp | 47 +- src/coreclr/jit/simd.cpp | 48 +- src/coreclr/jit/simd.h | 2 +- src/coreclr/jit/simdcodegenxarch.cpp | 87 +- src/coreclr/jit/ssabuilder.h | 7 +- src/coreclr/jit/valuenum.cpp | 124 +- src/coreclr/jit/valuenum.h | 4 +- 42 files changed, 2068 insertions(+), 2483 deletions(-) diff --git a/src/coreclr/jit/assertionprop.cpp b/src/coreclr/jit/assertionprop.cpp index 75286febdc44e..ee5f527b2c35e 100644 --- a/src/coreclr/jit/assertionprop.cpp +++ b/src/coreclr/jit/assertionprop.cpp @@ -1258,8 +1258,7 @@ AssertionIndex Compiler::optCreateAssertion(GenTree* op1, optAssertionKind assertionKind, bool helperCallArgs) { - assert((op1 != nullptr) && !op1->OperIs(GT_LIST)); - assert((op2 == nullptr) || !op2->OperIs(GT_LIST)); + assert(op1 != nullptr); assert(!helperCallArgs || (op2 != nullptr)); AssertionDsc assertion = {OAK_INVALID}; diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 915dede604872..82b1b4a66bcc1 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1130,9 +1130,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genConsumeRegs(GenTree* tree); void genConsumeOperands(GenTreeOp* tree); -#ifdef FEATURE_HW_INTRINSICS - void genConsumeHWIntrinsicOperands(GenTreeHWIntrinsic* tree); -#endif // FEATURE_HW_INTRINSICS +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + void genConsumeMultiOpOperands(GenTreeMultiOp* tree); +#endif void genEmitGSCookieCheck(bool pushReg); void genCodeForShift(GenTree* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 7831b66ecaae6..29de2240fb6a4 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3879,7 +3879,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) noway_assert(!"SIMD intrinsic with unsupported base type."); } - switch (simdNode->gtSIMDIntrinsicID) + switch (simdNode->GetSIMDIntrinsicId()) { case SIMDIntrinsicInit: genSIMDIntrinsicInit(simdNode); @@ -4039,15 +4039,15 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type // void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInit); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); - genConsumeOperands(simdNode); + genConsumeMultiOpOperands(simdNode); regNumber op1Reg = op1->IsIntegralConst(0) ? REG_ZR : op1->GetRegNum(); // TODO-ARM64-CQ Add LD1R to allow SIMDIntrinsicInit from contained memory @@ -4090,16 +4090,18 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); - var_types targetType = simdNode->TypeGet(); - - var_types baseType = simdNode->GetSimdBaseType(); + var_types targetType = simdNode->TypeGet(); + var_types baseType = simdNode->GetSimdBaseType(); + emitAttr baseTypeSize = emitTypeSize(baseType); + regNumber vectorReg = targetReg; + size_t initCount = simdNode->GetOperandCount(); - regNumber vectorReg = targetReg; + assert((initCount * baseTypeSize) <= simdNode->GetSimdSize()); if (varTypeIsFloating(baseType)) { @@ -4108,24 +4110,17 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) vectorReg = simdNode->GetSingleTempReg(RBM_ALLFLOAT); } - emitAttr baseTypeSize = emitTypeSize(baseType); - // We will first consume the list items in execution (left to right) order, // and record the registers. regNumber operandRegs[FP_REGSIZE_BYTES]; - unsigned initCount = 0; - for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2()) + for (size_t i = 1; i <= initCount; i++) { - assert(list->OperGet() == GT_LIST); - GenTree* listItem = list->gtGetOp1(); - assert(listItem->TypeGet() == baseType); - assert(!listItem->isContained()); - regNumber operandReg = genConsumeReg(listItem); - operandRegs[initCount] = operandReg; - initCount++; - } + GenTree* operand = simdNode->Op(i); + assert(operand->TypeIs(baseType)); + assert(!operand->isContained()); - assert((initCount * baseTypeSize) <= simdNode->GetSimdSize()); + operandRegs[i - 1] = genConsumeReg(operand); + } if (initCount * baseTypeSize < EA_16BYTE) { @@ -4164,25 +4159,25 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToSingle || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt32 || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToDouble || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicConvertToInt64); + assert((simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicCast) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToSingle) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToInt32) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToDouble) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicConvertToInt64)); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); - genConsumeOperands(simdNode); + genConsumeMultiOpOperands(simdNode); regNumber op1Reg = op1->GetRegNum(); assert(genIsValidFloatReg(op1Reg)); assert(genIsValidFloatReg(targetReg)); - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); emitAttr attr = (simdNode->GetSimdSize() > 8) ? EA_16BYTE : EA_8BYTE; if (GetEmitter()->IsMovInstruction(ins)) @@ -4208,17 +4203,19 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual); + assert((simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicSub) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicBitwiseAnd) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicBitwiseOr) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicEqual)); - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); + GenTree* op1 = simdNode->Op(1); + GenTree* op2 = simdNode->Op(2); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); - genConsumeOperands(simdNode); + genConsumeMultiOpOperands(simdNode); regNumber op1Reg = op1->GetRegNum(); regNumber op2Reg = op2->GetRegNum(); @@ -4228,7 +4225,7 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // TODO-ARM64-CQ Contain integer constants where posible - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); emitAttr attr = (simdNode->GetSimdSize() > 8) ? EA_16BYTE : EA_8BYTE; insOpts opt = genGetSimdInsOpt(attr, baseType); @@ -4257,9 +4254,9 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicUpperSave); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); assert(emitTypeSize(varDsc->GetRegisterType(lclNode)) == 16); @@ -4307,9 +4304,9 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicUpperRestore); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); assert(op1->IsLocal()); GenTreeLclVar* lclNode = op1->AsLclVar(); LclVarDsc* varDsc = compiler->lvaGetDesc(lclNode); diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 9faf0c603370b..d53014650dc6b 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -390,7 +390,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // This is handled at the time we call genConsumeReg() on the GT_COPY break; - case GT_LIST: case GT_FIELD_LIST: // Should always be marked contained. assert(!"LIST, FIELD_LIST nodes should always be marked contained."); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 73e14e76b2a3e..67caa7e41ce97 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -1228,7 +1228,8 @@ unsigned CodeGenInterface::InferStructOpSizeAlign(GenTree* op, unsigned* alignme { opSize = (unsigned)op2->AsIntCon()->gtIconVal; GenTree* op1 = op->AsOp()->gtOp1; - assert(op1->OperGet() == GT_LIST); + // TODO-List-Cleanup: this looks like some really old dead code. + // assert(op1->OperGet() == GT_LIST); GenTree* dstAddr = op1->AsOp()->gtOp1; if (dstAddr->OperGet() == GT_ADDR) { diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 88788fa097e2a..60c2019547480 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -1623,14 +1623,18 @@ void CodeGen::genConsumeRegs(GenTree* tree) else if (tree->OperIs(GT_HWINTRINSIC)) { // Only load/store HW intrinsics can be contained (and the address may also be contained). - HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(tree->AsHWIntrinsic()->gtHWIntrinsicId); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(tree->AsHWIntrinsic()->GetHWIntrinsicId()); assert((category == HW_Category_MemoryLoad) || (category == HW_Category_MemoryStore)); - int numArgs = HWIntrinsicInfo::lookupNumArgs(tree->AsHWIntrinsic()); - genConsumeAddress(tree->gtGetOp1()); + size_t numArgs = tree->AsHWIntrinsic()->GetOperandCount(); + genConsumeAddress(tree->AsHWIntrinsic()->Op(1)); if (category == HW_Category_MemoryStore) { - assert((numArgs == 2) && !tree->gtGetOp2()->isContained()); - genConsumeReg(tree->gtGetOp2()); + assert(numArgs == 2); + + GenTree* op2 = tree->AsHWIntrinsic()->Op(2); + assert(op2->isContained()); + + genConsumeReg(op2); } else { @@ -1674,7 +1678,6 @@ void CodeGen::genConsumeRegs(GenTree* tree) // Return Value: // None. // - void CodeGen::genConsumeOperands(GenTreeOp* tree) { GenTree* firstOp = tree->gtOp1; @@ -1690,54 +1693,25 @@ void CodeGen::genConsumeOperands(GenTreeOp* tree) } } -#ifdef FEATURE_HW_INTRINSICS +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ -// genConsumeHWIntrinsicOperands: Do liveness update for the operands of a GT_HWINTRINSIC node +// genConsumeOperands: Do liveness update for the operands of a multi-operand node, +// currently GT_SIMD or GT_HWINTRINSIC // // Arguments: -// node - the GenTreeHWIntrinsic node whose operands will have their liveness updated. +// tree - the GenTreeMultiOp whose operands will have their liveness updated. // // Return Value: // None. // - -void CodeGen::genConsumeHWIntrinsicOperands(GenTreeHWIntrinsic* node) +void CodeGen::genConsumeMultiOpOperands(GenTreeMultiOp* tree) { - int numArgs = HWIntrinsicInfo::lookupNumArgs(node); - GenTree* op1 = node->gtGetOp1(); - if (op1 == nullptr) + for (GenTree* operand : tree->Operands()) { - assert((numArgs == 0) && (node->gtGetOp2() == nullptr)); - return; - } - if (op1->OperIs(GT_LIST)) - { - int foundArgs = 0; - assert(node->gtGetOp2() == nullptr); - for (GenTreeArgList* list = op1->AsArgList(); list != nullptr; list = list->Rest()) - { - GenTree* operand = list->Current(); - genConsumeRegs(operand); - foundArgs++; - } - assert(foundArgs == numArgs); - } - else - { - genConsumeRegs(op1); - GenTree* op2 = node->gtGetOp2(); - if (op2 != nullptr) - { - genConsumeRegs(op2); - assert(numArgs == 2); - } - else - { - assert(numArgs == 1); - } + genConsumeRegs(operand); } } -#endif // FEATURE_HW_INTRINSICS +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) #if FEATURE_PUT_STRUCT_ARG_STK //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index d580cebc72fea..9de733fe1f597 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1673,7 +1673,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) // This is handled at the time we call genConsumeReg() on the GT_COPY break; - case GT_LIST: case GT_FIELD_LIST: // Should always be marked contained. assert(!"LIST, FIELD_LIST nodes should always be marked contained."); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2a1e357c061ed..97b8024b860e6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3083,8 +3083,6 @@ class Compiler GenTree* gtNewCpObjNode(GenTree* dst, GenTree* src, CORINFO_CLASS_HANDLE structHnd, bool isVolatile); - GenTreeArgList* gtNewListNode(GenTree* op1, GenTreeArgList* op2); - GenTreeCall::Use* gtNewCallArgs(GenTree* node); GenTreeCall::Use* gtNewCallArgs(GenTree* node1, GenTree* node2); GenTreeCall::Use* gtNewCallArgs(GenTree* node1, GenTree* node2, GenTree* node3); @@ -3166,6 +3164,19 @@ class Compiler CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic = false); + GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type, + GenTree** operands, + size_t operandCount, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic = false); + GenTreeHWIntrinsic* gtNewSimdHWIntrinsicNode(var_types type, + IntrinsicNodeBuilder&& nodeBuilder, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic = false); GenTreeHWIntrinsic* gtNewSimdAsHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID, @@ -3355,11 +3366,6 @@ class Compiler void gtChangeOperToNullCheck(GenTree* tree, BasicBlock* block); - GenTreeArgList* gtNewArgList(GenTree* op); - GenTreeArgList* gtNewArgList(GenTree* op1, GenTree* op2); - GenTreeArgList* gtNewArgList(GenTree* op1, GenTree* op2, GenTree* op3); - GenTreeArgList* gtNewArgList(GenTree* op1, GenTree* op2, GenTree* op3, GenTree* op4); - static fgArgTabEntry* gtArgEntryByArgNum(GenTreeCall* call, unsigned argNum); static fgArgTabEntry* gtArgEntryByNode(GenTreeCall* call, GenTree* node); fgArgTabEntry* gtArgEntryByLateArgIndex(GenTreeCall* call, unsigned lateArgInx); @@ -3438,8 +3444,6 @@ class Compiler // Create copy of an inline or guarded devirtualization candidate tree. GenTreeCall* gtCloneCandidateCall(GenTreeCall* call); - GenTree* gtReplaceTree(Statement* stmt, GenTree* tree, GenTree* replacementTree); - void gtUpdateSideEffects(Statement* stmt, GenTree* tree); void gtUpdateTreeAncestorsSideEffects(GenTree* tree); @@ -3464,8 +3468,8 @@ class Compiler bool gtHasLocalsWithAddrOp(GenTree* tree); - unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs); unsigned gtSetCallArgsOrder(const GenTreeCall::UseList& args, bool lateArgs, int* callCostEx, int* callCostSz); + unsigned gtSetMultiOpOrder(GenTreeMultiOp* multiOp); void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* base, bool constOnly); @@ -5503,12 +5507,12 @@ class Compiler #ifdef FEATURE_SIMD // Does value-numbering for a GT_SIMD tree - void fgValueNumberSimd(GenTree* tree); + void fgValueNumberSimd(GenTreeSIMD* tree); #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS // Does value-numbering for a GT_HWINTRINSIC tree - void fgValueNumberHWIntrinsic(GenTree* tree); + void fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree); #endif // FEATURE_HW_INTRINSICS // Does value-numbering for a call. We interpret some helper calls. @@ -6313,7 +6317,6 @@ class Compiler GenTreeFieldList* fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl); void fgInitArgInfo(GenTreeCall* call); GenTreeCall* fgMorphArgs(GenTreeCall* call); - GenTreeArgList* fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac); void fgMakeOutgoingStructArgCopy(GenTreeCall* call, GenTreeCall::Use* args, CORINFO_CLASS_HANDLE copyBlkClass); @@ -6386,6 +6389,7 @@ class Compiler GenTree* fgMorphRetInd(GenTreeUnOp* tree); GenTree* fgMorphModToSubMulDiv(GenTreeOp* tree); GenTree* fgMorphSmpOpOptional(GenTreeOp* tree); + GenTree* fgMorphMultiOp(GenTreeMultiOp* multiOp); GenTree* fgMorphConst(GenTree* tree); bool fgMorphCanUseLclFldForCopy(unsigned lclNum1, unsigned lclNum2); @@ -11518,6 +11522,42 @@ class GenTreeVisitor break; } +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + if (TVisitor::UseExecutionOrder && node->IsReverseOp()) + { + assert(node->AsMultiOp()->GetOperandCount() == 2); + + result = WalkTree(&node->AsMultiOp()->Op(2), node); + if (result == fgWalkResult::WALK_ABORT) + { + return result; + } + result = WalkTree(&node->AsMultiOp()->Op(1), node); + if (result == fgWalkResult::WALK_ABORT) + { + return result; + } + } + else + { + for (GenTree** use : node->AsMultiOp()->UseEdges()) + { + result = WalkTree(use, node); + if (result == fgWalkResult::WALK_ABORT) + { + return result; + } + } + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + // Binary nodes default: { diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 0fdcc26810a20..616080be59c12 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -1384,6 +1384,10 @@ inline void GenTree::SetOperRaw(genTreeOps oper) // Please do not do anything here other than assign to gtOper (debug-only // code is OK, but should be kept to a minimum). RecordOperBashing(OperGet(), oper); // nop unless NODEBASH_STATS is enabled + + // Bashing to MultiOp nodes is not currently supported. + assert(!OperIsMultiOp(oper)); + gtOper = oper; } @@ -4266,32 +4270,22 @@ void GenTree::VisitOperands(TVisitor visitor) return; // Variadic nodes -#ifdef FEATURE_SIMD +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) case GT_SIMD: - if (this->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInitN) - { - assert(this->AsSIMD()->gtOp1 != nullptr); - this->AsSIMD()->gtOp1->VisitListOperands(visitor); - } - else - { - VisitBinOpOperands(visitor); - } - return; -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS +#endif +#if defined(FEATURE_HW_INTRINSICS) case GT_HWINTRINSIC: - if ((this->AsHWIntrinsic()->gtOp1 != nullptr) && this->AsHWIntrinsic()->gtOp1->OperIsList()) - { - this->AsHWIntrinsic()->gtOp1->VisitListOperands(visitor); - } - else +#endif + for (GenTree* operand : this->AsMultiOp()->Operands()) { - VisitBinOpOperands(visitor); + if (visitor(operand) == VisitResult::Abort) + { + break; + } } return; -#endif // FEATURE_HW_INTRINSICS +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) // Special nodes case GT_PHI: @@ -4437,20 +4431,6 @@ void GenTree::VisitOperands(TVisitor visitor) } } -template -GenTree::VisitResult GenTree::VisitListOperands(TVisitor visitor) -{ - for (GenTreeArgList* node = this->AsArgList(); node != nullptr; node = node->Rest()) - { - if (visitor(node->gtOp1) == VisitResult::Abort) - { - return VisitResult::Abort; - } - } - - return VisitResult::Continue; -} - template void GenTree::VisitBinOpOperands(TVisitor visitor) { diff --git a/src/coreclr/jit/decomposelongs.cpp b/src/coreclr/jit/decomposelongs.cpp index 50a1c05406285..36f87718f3fd1 100644 --- a/src/coreclr/jit/decomposelongs.cpp +++ b/src/coreclr/jit/decomposelongs.cpp @@ -1650,7 +1650,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use) GenTreeHWIntrinsic* hwintrinsicTree = tree->AsHWIntrinsic(); - switch (hwintrinsicTree->gtHWIntrinsicId) + switch (hwintrinsicTree->GetHWIntrinsicId()) { case NI_Vector128_GetElement: case NI_Vector256_GetElement: @@ -1693,10 +1693,11 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW { assert(node == use.Def()); assert(varTypeIsLong(node)); - assert((node->gtHWIntrinsicId == NI_Vector128_GetElement) || (node->gtHWIntrinsicId == NI_Vector256_GetElement)); + assert((node->GetHWIntrinsicId() == NI_Vector128_GetElement) || + (node->GetHWIntrinsicId() == NI_Vector256_GetElement)); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -1712,24 +1713,24 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW index = op2->AsIntCon()->IconValue(); } - GenTree* simdTmpVar = RepresentOpAsLocalVar(op1, node, &node->gtOp1); + GenTree* simdTmpVar = RepresentOpAsLocalVar(op1, node, &node->Op(1)); unsigned simdTmpVarNum = simdTmpVar->AsLclVarCommon()->GetLclNum(); JITDUMP("[DecomposeHWIntrinsicGetElement]: Saving op1 tree to a temp var:\n"); DISPTREERANGE(Range(), simdTmpVar); Range().Remove(simdTmpVar); - op1 = node->gtGetOp1(); + op1 = node->Op(1); GenTree* indexTmpVar = nullptr; unsigned indexTmpVarNum = 0; if (!indexIsConst) { - indexTmpVar = RepresentOpAsLocalVar(op2, node, &node->gtOp2); + indexTmpVar = RepresentOpAsLocalVar(op2, node, &node->Op(2)); indexTmpVarNum = indexTmpVar->AsLclVarCommon()->GetLclNum(); JITDUMP("[DecomposeHWIntrinsicGetElement]: Saving op2 tree to a temp var:\n"); DISPTREERANGE(Range(), indexTmpVar); Range().Remove(indexTmpVar); - op2 = node->gtGetOp2(); + op2 = node->Op(2); } // Create: @@ -1756,7 +1757,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW } GenTree* loResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar1, indexTimesTwo1, - node->gtHWIntrinsicId, CORINFO_TYPE_INT, simdSize); + node->GetHWIntrinsicId(), CORINFO_TYPE_INT, simdSize); Range().InsertBefore(node, loResult); // Create: @@ -1782,7 +1783,7 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsicGetElement(LIR::Use& use, GenTreeHW } GenTree* hiResult = m_compiler->gtNewSimdHWIntrinsicNode(TYP_INT, simdTmpVar2, indexTimesTwoPlusOne, - node->gtHWIntrinsicId, CORINFO_TYPE_INT, simdSize); + node->GetHWIntrinsicId(), CORINFO_TYPE_INT, simdSize); Range().InsertBefore(node, hiResult); // Done with the original tree; remove it. diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index ac4850e27a521..3b16fc7bd7572 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -2851,6 +2851,11 @@ void Compiler::fgDebugCheckFlags(GenTree* tree) chkFlags |= GTF_EXCEPT; } + if (tree->OperRequiresAsgFlag()) + { + chkFlags |= GTF_ASG; + } + if (tree->OperRequiresCallFlag(this)) { chkFlags |= GTF_CALL; @@ -2931,31 +2936,6 @@ void Compiler::fgDebugCheckFlags(GenTree* tree) } break; - case GT_LIST: - if ((op2 != nullptr) && op2->OperIsAnyList()) - { - ArrayStack stack(getAllocator(CMK_DebugOnly)); - while ((tree->gtGetOp2() != nullptr) && tree->gtGetOp2()->OperIsAnyList()) - { - stack.Push(tree); - tree = tree->gtGetOp2(); - } - - fgDebugCheckFlags(tree); - - while (!stack.Empty()) - { - tree = stack.Pop(); - assert((tree->gtFlags & GTF_REVERSE_OPS) == 0); - fgDebugCheckFlags(tree->AsOp()->gtOp1); - chkFlags |= (tree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT); - chkFlags |= (tree->gtGetOp2()->gtFlags & GTF_ALL_EFFECT); - fgDebugCheckFlagsHelper(tree, (tree->gtFlags & GTF_ALL_EFFECT), chkFlags); - } - - return; - } - break; case GT_ADDR: assert(!op1->CanCSE()); break; @@ -3097,11 +3077,6 @@ void Compiler::fgDebugCheckFlags(GenTree* tree) */ } - if (tree->OperRequiresAsgFlag()) - { - chkFlags |= GTF_ASG; - } - if (oper == GT_ADDR && (op1->OperIsLocal() || op1->gtOper == GT_CLS_VAR || (op1->gtOper == GT_IND && op1->AsOp()->gtOp1->gtOper == GT_CLS_VAR_ADDR))) { @@ -3195,6 +3170,23 @@ void Compiler::fgDebugCheckFlags(GenTree* tree) } break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + // TODO-List-Cleanup: consider using the general Operands() iterator + // here for the "special" nodes to reduce code duplication. + for (GenTree* operand : tree->AsMultiOp()->Operands()) + { + fgDebugCheckFlags(operand); + chkFlags |= (operand->gtFlags & GTF_ALL_EFFECT); + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_ARR_ELEM: GenTree* arrObj; diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 654683fbba335..d99bc4d0be846 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3903,6 +3903,7 @@ GenTree* Compiler::fgSetTreeSeq(GenTree* tree, GenTree* prevTree, bool isLIR) void Compiler::fgSetTreeSeqHelper(GenTree* tree, bool isLIR) { + // TODO-List-Cleanup: measure what using GenTreeVisitor here brings. genTreeOps oper; unsigned kind; @@ -3962,40 +3963,6 @@ void Compiler::fgSetTreeSeqHelper(GenTree* tree, bool isLIR) GenTree* op1 = tree->AsOp()->gtOp1; GenTree* op2 = tree->gtGetOp2IfPresent(); - // Special handling for GT_LIST - if (tree->OperGet() == GT_LIST) - { - // First, handle the list items, which will be linked in forward order. - // As we go, we will link the GT_LIST nodes in reverse order - we will number - // them and update fgTreeSeqList in a subsequent traversal. - GenTree* nextList = tree; - GenTree* list = nullptr; - while (nextList != nullptr && nextList->OperGet() == GT_LIST) - { - list = nextList; - GenTree* listItem = list->AsOp()->gtOp1; - fgSetTreeSeqHelper(listItem, isLIR); - nextList = list->AsOp()->gtOp2; - if (nextList != nullptr) - { - nextList->gtNext = list; - } - list->gtPrev = nextList; - } - // Next, handle the GT_LIST nodes. - // Note that fgSetTreeSeqFinish() sets the gtNext to null, so we need to capture the nextList - // before we call that method. - nextList = list; - do - { - assert(list != nullptr); - list = nextList; - nextList = list->gtNext; - fgSetTreeSeqFinish(list, isLIR); - } while (list != tree); - return; - } - /* Special handling for AddrMode */ if (tree->OperIsAddrMode()) { @@ -4097,6 +4064,29 @@ void Compiler::fgSetTreeSeqHelper(GenTree* tree, bool isLIR) break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + if (tree->IsReverseOp()) + { + assert(tree->AsMultiOp()->GetOperandCount() == 2); + fgSetTreeSeqHelper(tree->AsMultiOp()->Op(2), isLIR); + fgSetTreeSeqHelper(tree->AsMultiOp()->Op(1), isLIR); + } + else + { + for (GenTree* operand : tree->AsMultiOp()->Operands()) + { + fgSetTreeSeqHelper(operand, isLIR); + } + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_ARR_ELEM: fgSetTreeSeqHelper(tree->AsArrElem()->gtArrObj, isLIR); @@ -4161,7 +4151,7 @@ void Compiler::fgSetTreeSeqFinish(GenTree* tree, bool isLIR) { tree->gtFlags &= ~GTF_REVERSE_OPS; - if (tree->OperIs(GT_LIST, GT_ARGPLACE)) + if (tree->OperIs(GT_ARGPLACE)) { return; } @@ -4447,7 +4437,7 @@ GenTree* Compiler::fgGetFirstNode(GenTree* tree) GenTree* child = tree; while (child->NumChildren() > 0) { - if (child->OperIsBinary() && child->IsReverseOp()) + if ((child->OperIsBinary() || child->OperIsMultiOp()) && child->IsReverseOp()) { child = child->GetChild(1); } diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 0d51f23b3ae85..01b1296f13f59 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -308,7 +308,6 @@ void GenTree::InitNodeSize() static_assert_no_msg(sizeof(GenTreeCast) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeBox) <= TREE_NODE_SZ_LARGE); // *** large node static_assert_no_msg(sizeof(GenTreeField) <= TREE_NODE_SZ_LARGE); // *** large node - static_assert_no_msg(sizeof(GenTreeArgList) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeFieldList) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeColon) <= TREE_NODE_SZ_SMALL); static_assert_no_msg(sizeof(GenTreeCall) <= TREE_NODE_SZ_LARGE); // *** large node @@ -1508,29 +1507,6 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) return false; } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - if ((op1->AsSIMD()->gtSIMDIntrinsicID != op2->AsSIMD()->gtSIMDIntrinsicID) || - (op1->AsSIMD()->GetSimdBaseType() != op2->AsSIMD()->GetSimdBaseType()) || - (op1->AsSIMD()->GetSimdSize() != op2->AsSIMD()->GetSimdSize())) - { - return false; - } - break; -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS - case GT_HWINTRINSIC: - if ((op1->AsHWIntrinsic()->gtHWIntrinsicId != op2->AsHWIntrinsic()->gtHWIntrinsicId) || - (op1->AsHWIntrinsic()->GetSimdBaseType() != op2->AsHWIntrinsic()->GetSimdBaseType()) || - (op1->AsHWIntrinsic()->GetSimdSize() != op2->AsHWIntrinsic()->GetSimdSize()) || - (op1->AsHWIntrinsic()->GetAuxiliaryType() != op2->AsHWIntrinsic()->GetAuxiliaryType()) || - (op1->AsHWIntrinsic()->GetOtherReg() != op2->AsHWIntrinsic()->GetOtherReg())) - { - return false; - } - break; -#endif // For the ones below no extra argument matters for comparison. case GT_QMARK: @@ -1592,6 +1568,16 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) case GT_CALL: return GenTreeCall::Equals(op1->AsCall(), op2->AsCall()); +#ifdef FEATURE_SIMD + case GT_SIMD: + return GenTreeSIMD::Equals(op1->AsSIMD(), op2->AsSIMD()); +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + return GenTreeHWIntrinsic::Equals(op1->AsHWIntrinsic(), op2->AsHWIntrinsic()); +#endif + case GT_ARR_ELEM: if (op1->AsArrElem()->gtArrRank != op2->AsArrElem()->gtArrRank) @@ -1653,6 +1639,7 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK) * Returns non-zero if the given tree contains a use of a local #lclNum. */ +// TODO-List-Cleanup: rewrite with a general visitor. bool Compiler::gtHasRef(GenTree* tree, ssize_t lclNum, bool defOnly) { genTreeOps oper; @@ -1800,6 +1787,23 @@ bool Compiler::gtHasRef(GenTree* tree, ssize_t lclNum, bool defOnly) break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + for (GenTree* operand : tree->AsMultiOp()->Operands()) + { + if (gtHasRef(operand, lclNum, defOnly)) + { + return true; + } + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_ARR_ELEM: if (gtHasRef(tree->AsArrElem()->gtArrObj, lclNum, defOnly)) { @@ -2143,7 +2147,7 @@ unsigned Compiler::gtHashValue(GenTree* tree) #ifdef FEATURE_SIMD case GT_SIMD: - hash += tree->AsSIMD()->gtSIMDIntrinsicID; + hash += tree->AsSIMD()->GetSIMDIntrinsicId(); hash += tree->AsSIMD()->GetSimdBaseType(); hash += tree->AsSIMD()->GetSimdSize(); break; @@ -2151,7 +2155,7 @@ unsigned Compiler::gtHashValue(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - hash += tree->AsHWIntrinsic()->gtHWIntrinsicId; + hash += tree->AsHWIntrinsic()->GetHWIntrinsicId(); hash += tree->AsHWIntrinsic()->GetSimdBaseType(); hash += tree->AsHWIntrinsic()->GetSimdSize(); hash += tree->AsHWIntrinsic()->GetAuxiliaryType(); @@ -2245,6 +2249,21 @@ unsigned Compiler::gtHashValue(GenTree* tree) } break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + // TODO-List: rewrite with a general visitor / iterator? + for (GenTree* operand : tree->AsMultiOp()->Operands()) + { + hash = genTreeHashAdd(hash, gtHashValue(operand)); + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_PHI: for (GenTreePhi::Use& use : tree->AsPhi()->Uses()) { @@ -2539,123 +2558,6 @@ void GenTreeOp::DebugCheckLongMul() #endif // !defined(TARGET_64BIT) && defined(DEBUG) #endif // !defined(TARGET_64BIT) || defined(TARGET_ARM64) -//------------------------------------------------------------------------------ -// gtSetListOrder : Figure out the evaluation order for a list of values. -// -// -// Arguments: -// list - List to figure out the evaluation order for -// isListCallArgs - True iff the list is a list of call arguments -// callArgsInRegs - True iff the list is a list of call arguments and they are passed in registers -// -// Return Value: -// True if the operation can be a root of a bitwise rotation tree; false otherwise. - -unsigned Compiler::gtSetListOrder(GenTree* list, bool isListCallArgs, bool callArgsInRegs) -{ - assert((list != nullptr) && list->OperIsAnyList()); - assert(!callArgsInRegs || isListCallArgs); - - ArrayStack listNodes(getAllocator(CMK_ArrayStack)); - - do - { - listNodes.Push(list); - list = list->AsOp()->gtOp2; - } while ((list != nullptr) && (list->OperIsAnyList())); - - unsigned nxtlvl = (list == nullptr) ? 0 : gtSetEvalOrder(list); - while (!listNodes.Empty()) - { - list = listNodes.Pop(); - assert(list && list->OperIsAnyList()); - GenTree* next = list->AsOp()->gtOp2; - - unsigned level = 0; - - // TODO: Do we have to compute costs differently for argument lists and - // all other lists? - // https://github.com/dotnet/runtime/issues/6622 - unsigned costSz = (isListCallArgs || (next == nullptr)) ? 0 : 1; - unsigned costEx = (isListCallArgs || (next == nullptr)) ? 0 : 1; - - if (next != nullptr) - { - if (isListCallArgs) - { - if (level < nxtlvl) - { - level = nxtlvl; - } - } - costEx += next->GetCostEx(); - costSz += next->GetCostSz(); - } - - GenTree* op1 = list->AsOp()->gtOp1; - unsigned lvl = gtSetEvalOrder(op1); - - // Swap the level counts - if (list->gtFlags & GTF_REVERSE_OPS) - { - unsigned tmpl; - - tmpl = lvl; - lvl = nxtlvl; - nxtlvl = tmpl; - } - - // TODO: Do we have to compute levels differently for argument lists and - // all other lists? - // https://github.com/dotnet/runtime/issues/6622 - if (isListCallArgs) - { - if (level < lvl) - { - level = lvl; - } - } - else - { - if (lvl < 1) - { - level = nxtlvl; - } - else if (lvl == nxtlvl) - { - level = lvl + 1; - } - else - { - level = lvl; - } - } - - if (op1->GetCostEx() != 0) - { - costEx += op1->GetCostEx(); - costEx += (callArgsInRegs || !isListCallArgs) ? 0 : IND_COST_EX; - } - - if (op1->GetCostSz() != 0) - { - costSz += op1->GetCostSz(); -#ifdef TARGET_XARCH - if (callArgsInRegs) // push is smaller than mov to reg -#endif - { - costSz += 1; - } - } - - list->SetCosts(costEx, costSz); - - nxtlvl = level; - } - - return nxtlvl; -} - unsigned Compiler::gtSetCallArgsOrder(const GenTreeCall::UseList& args, bool lateArgs, int* callCostEx, int* callCostSz) { unsigned level = 0; @@ -2696,6 +2598,172 @@ unsigned Compiler::gtSetCallArgsOrder(const GenTreeCall::UseList& args, bool lat return level; } +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +//------------------------------------------------------------------------ +// gtSetMultiOpOrder: Calculate the costs for a MultiOp. +// +// Currently this function just preserves the previous behavior. +// TODO-List-Cleanup: implement proper costing for these trees. +// +// Arguments: +// multiOp - The MultiOp tree in question +// +// Return Value: +// The Sethi "complexity" for this tree (the idealized number of +// registers needed to evaluate it). +// +unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp) +{ + // These default costs preserve previous behavior. + // TODO-CQ: investigate opportunities for tuning them. + int costEx = 1; + int costSz = 1; + unsigned level = 0; + unsigned lvl2 = 0; + +#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) + if (multiOp->OperIs(GT_HWINTRINSIC) && (multiOp->GetOperandCount() == 1) && + multiOp->AsHWIntrinsic()->OperIsMemoryLoadOrStore()) + { + costEx = IND_COST_EX; + costSz = 2; + + GenTree* addr = multiOp->Op(1)->gtEffectiveVal(); + level = gtSetEvalOrder(addr); + + // See if we can form a complex addressing mode. + if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, multiOp->TypeGet())) + { + // Nothing to do, costs have been set. + } + else + { + costEx += addr->GetCostEx(); + costSz += addr->GetCostSz(); + } + + multiOp->SetCosts(costEx, costSz); + return level; + } +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + + // This code is here to preserve previous behavior. + switch (multiOp->GetOperandCount()) + { + case 0: + // This is a constant HWIntrinsic, we already have correct costs. + break; + + case 1: + // A "unary" case. + level = gtSetEvalOrder(multiOp->Op(1)); + costEx += multiOp->Op(1)->GetCostEx(); + costSz += multiOp->Op(1)->GetCostSz(); + break; + + case 2: + // A "binary" case. + + // This way we have "level" be the complexity of the + // first tree to be evaluated, and "lvl2" - the second. + if (multiOp->IsReverseOp()) + { + level = gtSetEvalOrder(multiOp->Op(2)); + lvl2 = gtSetEvalOrder(multiOp->Op(1)); + } + else + { + level = gtSetEvalOrder(multiOp->Op(1)); + lvl2 = gtSetEvalOrder(multiOp->Op(2)); + } + + // We want the more complex tree to be evaluated first. + if (level < lvl2) + { + bool canSwap = multiOp->IsReverseOp() ? gtCanSwapOrder(multiOp->Op(2), multiOp->Op(1)) + : gtCanSwapOrder(multiOp->Op(1), multiOp->Op(2)); + + // The InitN intrinsic for two operands used to be not reversible, so preserve this. + // TODO-List-Cleanup: delete this only-needed-for-zero-diffs quirk. + if (multiOp->OperIs(GT_SIMD) && (multiOp->AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInitN)) + { + canSwap = false; + } + + if (canSwap) + { + if (multiOp->IsReverseOp()) + { + multiOp->ClearReverseOp(); + } + else + { + multiOp->SetReverseOp(); + } + + std::swap(level, lvl2); + } + } + + if (level < 1) + { + level = lvl2; + } + else if (level == lvl2) + { + level += 1; + } + + costEx += (multiOp->Op(1)->GetCostEx() + multiOp->Op(2)->GetCostEx()); + costSz += (multiOp->Op(1)->GetCostSz() + multiOp->Op(2)->GetCostSz()); + break; + + default: + // The former "ArgList" case... we'll be emulating it here. + // The old implementation pushed the nodes on the list, in pre-order. + // Then it popped and costed them in "reverse order", so that's what + // we'll be doing here as well. + + unsigned nxtlvl = 0; + for (size_t i = multiOp->GetOperandCount(); i >= 1; i--) + { + GenTree* op = multiOp->Op(i); + unsigned lvl = gtSetEvalOrder(op); + + if (lvl < 1) + { + level = nxtlvl; + } + else if (lvl == nxtlvl) + { + level = lvl + 1; + } + else + { + level = lvl; + } + + costEx += op->GetCostEx(); + costSz += op->GetCostSz(); + + // Preserving previous behavior... + CLANG_FORMAT_COMMENT_ANCHOR; +#ifndef TARGET_XARCH + if (op->GetCostSz() != 0) + { + costSz += 1; + } +#endif + nxtlvl = level; + } + break; + } + + multiOp->SetCosts(costEx, costSz); + return level; +} +#endif + //----------------------------------------------------------------------------- // gtWalkOp: Traverse and mark an address expression // @@ -3809,7 +3877,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) break; - case GT_LIST: case GT_NOP: costEx = 0; costSz = 0; @@ -3948,26 +4015,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costSz = 2 * 2; break; -#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - case GT_HWINTRINSIC: - { - if (tree->AsHWIntrinsic()->OperIsMemoryLoadOrStore()) - { - costEx = IND_COST_EX; - costSz = 2; - // See if we can form a complex addressing mode. - - GenTree* addr = op1->gtEffectiveVal(); - - if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, tree->TypeGet())) - { - goto DONE; - } - } - } - break; -#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH - case GT_BLK: case GT_IND: @@ -4225,13 +4272,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) goto DONE; - case GT_LIST: - { - const bool isListCallArgs = false; - const bool callArgsInRegs = false; - return gtSetListOrder(tree, isListCallArgs, callArgsInRegs); - } - case GT_INDEX_ADDR: costEx = 6; // cmp reg,reg; jae throw; mov reg, [addrmode] (not taken) costSz = 9; // jump to cold section @@ -4529,9 +4569,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) case GT_MKREFANY: break; - case GT_LIST: - break; - default: /* Mark the operand's evaluation order to be swapped */ @@ -4701,6 +4738,16 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) costEx += 3 * IND_COST_EX; break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + return gtSetMultiOpOrder(tree->AsMultiOp()); +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_ARR_ELEM: { GenTreeArrElem* arrElem = tree->AsArrElem(); @@ -4857,16 +4904,6 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree) } DONE: - -#ifdef FEATURE_HW_INTRINSICS - if ((oper == GT_HWINTRINSIC) && (tree->gtGetOp1() == nullptr)) - { - // We can have nullary HWIntrinsic nodes, and we must have non-zero cost. - costEx = 1; - costSz = 1; - } -#endif // FEATURE_HW_INTRINSICS - // Some path through this function must have set the costs. assert(costEx != -1); assert(costSz != -1); @@ -4970,10 +5007,9 @@ unsigned GenTree::GetScaledIndex() // Also note that when UNIX_AMD64_ABI is defined the GT_LDOBJ // later gets converted to a GT_FIELD_LIST with two GT_LCL_FLDs in Lower/LowerXArch. // - GenTree** GenTree::gtGetChildPointer(GenTree* parent) const - { + // TODO-List-Cleanup: remove, use TryGetUse instead. switch (parent->OperGet()) { default: @@ -5117,6 +5153,23 @@ GenTree** GenTree::gtGetChildPointer(GenTree* parent) const } } break; + +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + for (GenTree** use : parent->AsMultiOp()->UseEdges()) + { + if (this == *use) + { + return use; + } + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) } return nullptr; @@ -5221,26 +5274,23 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) return false; #endif // FEATURE_ARG_SPLIT -#ifdef FEATURE_SIMD +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) case GT_SIMD: - if (this->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInitN) - { - assert(this->AsSIMD()->gtOp1 != nullptr); - return this->AsSIMD()->gtOp1->TryGetUseList(def, use); - } - - return TryGetUseBinOp(def, use); -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS +#endif +#if defined(FEATURE_HW_INTRINSICS) case GT_HWINTRINSIC: - if ((this->AsHWIntrinsic()->gtOp1 != nullptr) && this->AsHWIntrinsic()->gtOp1->OperIsList()) +#endif + for (GenTree** opUse : this->AsMultiOp()->UseEdges()) { - return this->AsHWIntrinsic()->gtOp1->TryGetUseList(def, use); + if (*opUse == def) + { + *use = opUse; + return true; + } } - - return TryGetUseBinOp(def, use); -#endif // FEATURE_HW_INTRINSICS + return false; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) // Special nodes case GT_PHI: @@ -5415,22 +5465,6 @@ bool GenTree::TryGetUse(GenTree* def, GenTree*** use) } } -bool GenTree::TryGetUseList(GenTree* def, GenTree*** use) -{ - assert(def != nullptr); - assert(use != nullptr); - - for (GenTreeArgList* node = this->AsArgList(); node != nullptr; node = node->Rest()) - { - if (def == node->gtOp1) - { - *use = &node->gtOp1; - return true; - } - } - return false; -} - bool GenTree::TryGetUseBinOp(GenTree* def, GenTree*** use) { assert(def != nullptr); @@ -6344,7 +6378,7 @@ GenTree* Compiler::gtNewSIMDVectorZero(var_types simdType, CorInfoType simdBaseJ var_types simdBaseType = genActualType(JitType2PreciseVarType(simdBaseJitType)); GenTree* initVal = gtNewZeroConNode(simdBaseType); initVal->gtType = simdBaseType; - return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, simdBaseJitType, simdSize); + return gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, simdBaseJitType, simdSize); } #endif // FEATURE_SIMD @@ -6575,53 +6609,6 @@ GenTreeCall::Use* Compiler::gtNewCallArgs(GenTree* node1, GenTree* node2, GenTre return new (this, CMK_ASTNode) GenTreeCall::Use(node1, gtNewCallArgs(node2, node3, node4)); } -GenTreeArgList* Compiler::gtNewListNode(GenTree* op1, GenTreeArgList* op2) -{ - assert((op1 != nullptr) && (op1->OperGet() != GT_LIST)); - - return new (this, GT_LIST) GenTreeArgList(op1, op2); -} - -/***************************************************************************** - * - * Create a list out of one value. - */ - -GenTreeArgList* Compiler::gtNewArgList(GenTree* arg) -{ - return new (this, GT_LIST) GenTreeArgList(arg); -} - -/***************************************************************************** - * - * Create a list out of the two values. - */ - -GenTreeArgList* Compiler::gtNewArgList(GenTree* arg1, GenTree* arg2) -{ - return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2)); -} - -/***************************************************************************** - * - * Create a list out of the three values. - */ - -GenTreeArgList* Compiler::gtNewArgList(GenTree* arg1, GenTree* arg2, GenTree* arg3) -{ - return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2, arg3)); -} - -/***************************************************************************** - * - * Create a list out of the three values. - */ - -GenTreeArgList* Compiler::gtNewArgList(GenTree* arg1, GenTree* arg2, GenTree* arg3, GenTree* arg4) -{ - return new (this, GT_LIST) GenTreeArgList(arg1, gtNewArgList(arg2, arg3, arg4)); -} - /***************************************************************************** * * Given a GT_CALL node, access the fgArgInfo and find the entry @@ -7798,14 +7785,6 @@ GenTree* Compiler::gtCloneExpr( tree->AsCast()->gtCastType DEBUGARG(/*largeNode*/ TRUE)); break; - // The nodes below this are not bashed, so they can be allocated at their individual sizes. - - case GT_LIST: - assert((tree->AsOp()->gtOp2 == nullptr) || tree->AsOp()->gtOp2->OperIsList()); - copy = new (this, GT_LIST) GenTreeArgList(tree->AsOp()->gtOp1); - copy->AsOp()->gtOp2 = tree->AsOp()->gtOp2; - break; - case GT_INDEX: { GenTreeIndex* asInd = tree->AsIndex(); @@ -7934,30 +7913,6 @@ GenTree* Compiler::gtCloneExpr( } break; -#ifdef FEATURE_SIMD - case GT_SIMD: - { - GenTreeSIMD* simdOp = tree->AsSIMD(); - copy = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2IfPresent(), - simdOp->gtSIMDIntrinsicID, simdOp->GetSimdBaseJitType(), simdOp->GetSimdSize()); - } - break; -#endif - -#ifdef FEATURE_HW_INTRINSICS - case GT_HWINTRINSIC: - { - GenTreeHWIntrinsic* hwintrinsicOp = tree->AsHWIntrinsic(); - copy = new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(hwintrinsicOp->TypeGet(), hwintrinsicOp->gtGetOp1(), - hwintrinsicOp->gtGetOp2IfPresent(), hwintrinsicOp->gtHWIntrinsicId, - hwintrinsicOp->GetSimdBaseJitType(), hwintrinsicOp->GetSimdSize(), - hwintrinsicOp->IsSimdAsHWIntrinsic()); - copy->AsHWIntrinsic()->SetAuxiliaryJitType(hwintrinsicOp->GetAuxiliaryJitType()); - } - break; -#endif - default: assert(!GenTree::IsExOp(tree->OperKind()) && tree->OperIsSimple()); // We're in the SimpleOp case, so it's always unary or binary. @@ -8054,6 +8009,33 @@ GenTree* Compiler::gtCloneExpr( copy = gtCloneExprCallHelper(tree->AsCall(), addFlags, deepVarNum, deepVarVal); break; +#ifdef FEATURE_SIMD + case GT_SIMD: + copy = new (this, GT_SIMD) + GenTreeSIMD(tree->TypeGet(), IntrinsicNodeBuilder(getAllocator(CMK_ASTNode), tree->AsSIMD()), + tree->AsSIMD()->GetSIMDIntrinsicId(), tree->AsSIMD()->GetSimdBaseJitType(), + tree->AsSIMD()->GetSimdSize()); + goto CLONE_MULTIOP_OPERANDS; +#endif +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + copy = new (this, GT_HWINTRINSIC) + GenTreeHWIntrinsic(tree->TypeGet(), IntrinsicNodeBuilder(getAllocator(CMK_ASTNode), tree->AsMultiOp()), + tree->AsHWIntrinsic()->GetHWIntrinsicId(), + tree->AsHWIntrinsic()->GetSimdBaseJitType(), tree->AsHWIntrinsic()->GetSimdSize(), + tree->AsHWIntrinsic()->IsSimdAsHWIntrinsic()); + copy->AsHWIntrinsic()->SetAuxiliaryJitType(tree->AsHWIntrinsic()->GetAuxiliaryJitType()); + goto CLONE_MULTIOP_OPERANDS; +#endif +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + CLONE_MULTIOP_OPERANDS: + for (GenTree** use : copy->AsMultiOp()->UseEdges()) + { + *use = gtCloneExpr(*use, addFlags, deepVarNum, deepVarVal); + } + break; +#endif + case GT_ARR_ELEM: { GenTreeArrElem* arrElem = tree->AsArrElem(); @@ -8329,106 +8311,6 @@ GenTreeCall* Compiler::gtCloneCandidateCall(GenTreeCall* call) return result; } -//------------------------------------------------------------------------ -// gtReplaceTree: Replace a tree with a new tree. -// -// Arguments: -// stmt - The top-level root stmt of the tree being replaced. -// Must not be null. -// tree - The tree being replaced. Must not be null. -// replacementTree - The replacement tree. Must not be null. -// -// Return Value: -// The tree node that replaces the old tree. -// -// Assumptions: -// The sequencing of the stmt has been done. -// -// Notes: -// The caller must ensure that the original statement has been sequenced, -// and the side effect flags are updated on the statement nodes, -// but this method will sequence 'replacementTree', and insert it into the -// proper place in the statement sequence. - -GenTree* Compiler::gtReplaceTree(Statement* stmt, GenTree* tree, GenTree* replacementTree) -{ - assert(fgStmtListThreaded); - assert(tree != nullptr); - assert(stmt != nullptr); - assert(replacementTree != nullptr); - - GenTree** treePtr = nullptr; - GenTree* treeParent = tree->gtGetParent(&treePtr); - - assert(treeParent != nullptr || tree == stmt->GetRootNode()); - - if (treePtr == nullptr) - { - // Replace the stmt expr and rebuild the linear order for "stmt". - assert(treeParent == nullptr); - assert(fgOrder != FGOrderLinear); - stmt->SetRootNode(tree); - fgSetStmtSeq(stmt); - } - else - { - assert(treeParent != nullptr); - - // Check to see if the node to be replaced is a call argument and if so, - // set `treeParent` to the call node. - GenTree* cursor = treeParent; - while ((cursor != nullptr) && (cursor->OperGet() == GT_LIST)) - { - cursor = cursor->gtNext; - } - - if ((cursor != nullptr) && (cursor->OperGet() == GT_CALL)) - { - treeParent = cursor; - } - -#ifdef DEBUG - GenTree** useEdge; - assert(treeParent->TryGetUse(tree, &useEdge)); - assert(useEdge == treePtr); -#endif // DEBUG - - GenTree* treeFirstNode = fgGetFirstNode(tree); - GenTree* treeLastNode = tree; - GenTree* treePrevNode = treeFirstNode->gtPrev; - GenTree* treeNextNode = treeLastNode->gtNext; - - treeParent->ReplaceOperand(treePtr, replacementTree); - - // Build the linear order for "replacementTree". - fgSetTreeSeq(replacementTree, treePrevNode); - - // Restore linear-order Prev and Next for "replacementTree". - if (treePrevNode != nullptr) - { - treeFirstNode = fgGetFirstNode(replacementTree); - treeFirstNode->gtPrev = treePrevNode; - treePrevNode->gtNext = treeFirstNode; - } - else - { - // Update the linear oder start of "stmt" if treeFirstNode - // appears to have replaced the original first node. - assert(treeFirstNode == stmt->GetTreeList()); - stmt->SetTreeList(fgGetFirstNode(replacementTree)); - } - - if (treeNextNode != nullptr) - { - treeLastNode = replacementTree; - treeLastNode->gtNext = treeNextNode; - treeNextNode->gtPrev = treeLastNode; - } - } - - return replacementTree; -} - //------------------------------------------------------------------------ // gtUpdateSideEffects: Update the side effects of a tree and its ancestors // @@ -8744,6 +8626,7 @@ bool GenTree::gtRequestSetFlags() return result; } +// TODO-List-Cleanup: remove. unsigned GenTree::NumChildren() { if (OperIsConst() || OperIsLeaf()) @@ -8869,6 +8752,17 @@ unsigned GenTree::NumChildren() } return res; } + +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + return static_cast(AsMultiOp()->GetOperandCount()); +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_NONE: return 0; default: @@ -8877,6 +8771,7 @@ unsigned GenTree::NumChildren() } } +// TODO-List-Cleanup: remove. GenTree* GenTree::GetChild(unsigned childNum) { assert(childNum < NumChildren()); // Precondition. @@ -9068,8 +8963,17 @@ GenTree* GenTree::GetChild(unsigned childNum) unreached(); } - case GT_NONE: - unreached(); + +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + return AsMultiOp()->Op(childNum + 1); +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + default: unreached(); } @@ -9186,34 +9090,14 @@ GenTreeUseEdgeIterator::GenTreeUseEdgeIterator(GenTree* node) // Variadic nodes #ifdef FEATURE_SIMD case GT_SIMD: - if (m_node->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInitN) - { - SetEntryStateForList(m_node->AsSIMD()->gtOp1->AsArgList()); - } - else - { - SetEntryStateForBinOp(); - } - return; -#endif // FEATURE_SIMD - +#endif #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - if (m_node->AsHWIntrinsic()->gtOp1 == nullptr) - { - assert(m_node->NullOp1Legal()); - m_state = -1; - } - else if (m_node->AsHWIntrinsic()->gtOp1->OperIsList()) - { - SetEntryStateForList(m_node->AsHWIntrinsic()->gtOp1->AsArgList()); - } - else - { - SetEntryStateForBinOp(); - } +#endif +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + SetEntryStateForMultiOp(); return; -#endif // FEATURE_HW_INTRINSICS +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) // LEA, which may have no first operand case GT_LEA: @@ -9501,37 +9385,76 @@ void GenTreeUseEdgeIterator::SetEntryStateForBinOp() } } +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ -// GenTreeUseEdgeIterator::AdvanceList: produces the next operand of a variadic node and advances the state. +// GenTreeUseEdgeIterator::AdvanceMultiOp: produces the next operand of a multi-op node and advances the state. // -// This function does not use `m_state` for anything meaningful; it simply walks the `m_argList` until -// there are no further entries. +// Takes advantage of the fact that GenTreeMultiOp stores the operands in a contigious array, simply +// incrementing the "m_edge" pointer, unless the end, stored in "m_statePtr", has been reached. // -void GenTreeUseEdgeIterator::AdvanceList() +void GenTreeUseEdgeIterator::AdvanceMultiOp() { - assert(m_state == 0); + assert(m_node != nullptr); + assert(m_node->OperIs(GT_SIMD, GT_HWINTRINSIC)); - if (m_statePtr == nullptr) + m_edge++; + if (m_edge == m_statePtr) { - m_state = -1; + Terminate(); } - else +} + +//------------------------------------------------------------------------ +// GenTreeUseEdgeIterator::AdvanceReversedMultiOp: produces the next operand of a multi-op node +// marked with GTF_REVRESE_OPS and advances the state. +// +// Takes advantage of the fact that GenTreeMultiOp stores the operands in a contigious array, simply +// decrementing the "m_edge" pointer, unless the beginning, stored in "m_statePtr", has been reached. +// +void GenTreeUseEdgeIterator::AdvanceReversedMultiOp() +{ + assert(m_node != nullptr); + assert(m_node->OperIs(GT_SIMD, GT_HWINTRINSIC)); + assert((m_node->AsMultiOp()->GetOperandCount() == 2) && m_node->IsReverseOp()); + + m_edge--; + if (m_edge == m_statePtr) { - GenTreeArgList* listNode = static_cast(m_statePtr); - m_edge = &listNode->gtOp1; - m_statePtr = listNode->Rest(); + Terminate(); } } //------------------------------------------------------------------------ -// GenTreeUseEdgeIterator::SetEntryStateForList: produces the first operand of a list node. +// GenTreeUseEdgeIterator::SetEntryStateForMultiOp: produces the first operand of a multi-op node and sets the +// required advance function. // -void GenTreeUseEdgeIterator::SetEntryStateForList(GenTreeArgList* list) +void GenTreeUseEdgeIterator::SetEntryStateForMultiOp() { - m_statePtr = list; - m_advance = &GenTreeUseEdgeIterator::AdvanceList; - AdvanceList(); + size_t operandCount = m_node->AsMultiOp()->GetOperandCount(); + + if (operandCount == 0) + { + Terminate(); + } + else + { + if (m_node->IsReverseOp()) + { + assert(operandCount == 2); + + m_edge = m_node->AsMultiOp()->GetOperandArray() + 1; + m_statePtr = m_node->AsMultiOp()->GetOperandArray() - 1; + m_advance = &GenTreeUseEdgeIterator::AdvanceReversedMultiOp; + } + else + { + m_edge = m_node->AsMultiOp()->GetOperandArray(); + m_statePtr = m_node->AsMultiOp()->GetOperandArray(operandCount); + m_advance = &GenTreeUseEdgeIterator::AdvanceMultiOp; + } + } } +#endif //------------------------------------------------------------------------ // GenTreeUseEdgeIterator::AdvanceCall: produces the next operand of a call node and advances the state. @@ -10403,7 +10326,7 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _ /* Then print the general purpose flags */ GenTreeFlags flags = tree->gtFlags; - if (tree->OperIsBinary()) + if (tree->OperIsBinary() || tree->OperIsMultiOp()) { genTreeOps oper = tree->OperGet(); @@ -10416,9 +10339,9 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _ } } } - else // !tree->OperIsBinary() + else // !(tree->OperIsBinary() || tree->OperIsMultiOp()) { - // the GTF_REVERSE flag only applies to binary operations + // the GTF_REVERSE flag only applies to binary operations (which some MultiOp nodes are). flags &= ~GTF_REVERSE_OPS; // we use this value for GTF_VAR_ARR_INDEX above } @@ -11732,53 +11655,24 @@ void Compiler::gtDispTree(GenTree* tree, } } -#ifdef FEATURE_SIMD - if (tree->gtOper == GT_SIMD) - { - printf(" %s %s", varTypeName(tree->AsSIMD()->GetSimdBaseType()), - simdIntrinsicNames[tree->AsSIMD()->gtSIMDIntrinsicID]); - } -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS - if (tree->gtOper == GT_HWINTRINSIC) - { - printf(" %s %s", tree->AsHWIntrinsic()->GetSimdBaseType() == TYP_UNKNOWN - ? "" - : varTypeName(tree->AsHWIntrinsic()->GetSimdBaseType()), - HWIntrinsicInfo::lookupName(tree->AsHWIntrinsic()->gtHWIntrinsicId)); - } -#endif // FEATURE_HW_INTRINSICS - gtDispCommonEndLine(tree); if (!topOnly) { if (tree->AsOp()->gtOp1 != nullptr) { - if (tree->OperIs(GT_PHI)) + // Label the child of the GT_COLON operator + // op1 is the else part + if (tree->gtOper == GT_COLON) { - for (GenTreeArgList* args = tree->gtGetOp1()->AsArgList(); args != nullptr; args = args->Rest()) - { - gtDispChild(args->Current(), indentStack, (args->Rest() == nullptr) ? IIArcBottom : IIArc); - } + childMsg = "else"; } - else + else if (tree->gtOper == GT_QMARK) { - // Label the child of the GT_COLON operator - // op1 is the else part - - if (tree->gtOper == GT_COLON) - { - childMsg = "else"; - } - else if (tree->gtOper == GT_QMARK) - { - childMsg = " if"; - } - gtDispChild(tree->AsOp()->gtOp1, indentStack, - (tree->gtGetOp2IfPresent() == nullptr) ? IIArcBottom : IIArc, childMsg, topOnly); + childMsg = " if"; } + gtDispChild(tree->AsOp()->gtOp1, indentStack, + (tree->gtGetOp2IfPresent() == nullptr) ? IIArcBottom : IIArc, childMsg, topOnly); } if (tree->gtGetOp2IfPresent()) @@ -11928,6 +11822,45 @@ void Compiler::gtDispTree(GenTree* tree, } break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + +#if defined(FEATURE_SIMD) + if (tree->OperIs(GT_SIMD)) + { + printf(" %s %s", varTypeName(tree->AsSIMD()->GetSimdBaseType()), + simdIntrinsicNames[tree->AsSIMD()->GetSIMDIntrinsicId()]); + } +#endif // defined(FEATURE_SIMD) +#if defined(FEATURE_HW_INTRINSICS) + if (tree->OperIs(GT_HWINTRINSIC)) + { + printf(" %s %s", tree->AsHWIntrinsic()->GetSimdBaseType() == TYP_UNKNOWN + ? "" + : varTypeName(tree->AsHWIntrinsic()->GetSimdBaseType()), + HWIntrinsicInfo::lookupName(tree->AsHWIntrinsic()->GetHWIntrinsicId())); + } +#endif // defined(FEATURE_HW_INTRINSICS) + + gtDispCommonEndLine(tree); + + if (!topOnly) + { + size_t index = 0; + size_t count = tree->AsMultiOp()->GetOperandCount(); + for (GenTree* operand : tree->AsMultiOp()->Operands()) + { + gtDispChild(operand, indentStack, ++index < count ? IIArc : IIArcBottom, nullptr, topOnly); + } + } + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_ARR_ELEM: gtDispCommonEndLine(tree); @@ -12367,7 +12300,6 @@ void Compiler::gtDispLIRNode(GenTree* node, const char* prefixMsg /* = nullptr * { fgArgTabEntry* curArgTabEntry = gtArgEntryByNode(call, operand); assert(curArgTabEntry); - assert(operand->OperGet() != GT_LIST); if (!curArgTabEntry->isLateArg()) { @@ -14336,11 +14268,6 @@ GenTree* Compiler::gtFoldExprConst(GenTree* tree) return op2; } - if (tree->OperIsAnyList()) - { - return tree; - } - switchType = op1->TypeGet(); // Normally we will just switch on op1 types, but for the case where @@ -18491,7 +18418,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( assert(op1 != nullptr); SetOpLclRelatedToSIMDIntrinsic(op1); - GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, simdBaseJitType, simdSize); + GenTreeSIMD* simdNode = new (this, GT_SIMD) + GenTreeSIMD(type, getAllocator(CMK_ASTNode), op1, simdIntrinsicID, simdBaseJitType, simdSize); return simdNode; } @@ -18506,7 +18434,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(var_types type, SetOpLclRelatedToSIMDIntrinsic(op1); SetOpLclRelatedToSIMDIntrinsic(op2); - GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, simdBaseJitType, simdSize); + GenTreeSIMD* simdNode = new (this, GT_SIMD) + GenTreeSIMD(type, getAllocator(CMK_ASTNode), op1, op2, simdIntrinsicID, simdBaseJitType, simdSize); return simdNode; } @@ -18547,7 +18476,7 @@ void Compiler::SetOpLclRelatedToSIMDIntrinsic(GenTree* op) bool GenTree::isCommutativeSIMDIntrinsic() { assert(gtOper == GT_SIMD); - switch (AsSIMD()->gtSIMDIntrinsicID) + switch (AsSIMD()->GetSIMDIntrinsicId()) { case SIMDIntrinsicBitwiseAnd: case SIMDIntrinsicBitwiseOr: @@ -18558,6 +18487,81 @@ bool GenTree::isCommutativeSIMDIntrinsic() } } +void GenTreeMultiOp::ResetOperandArray(size_t newOperandCount, + Compiler* compiler, + GenTree** inlineOperands, + size_t inlineOperandCount) +{ + size_t oldOperandCount = GetOperandCount(); + GenTree** oldOperands = GetOperandArray(); + + if (newOperandCount > oldOperandCount) + { + if (newOperandCount <= inlineOperandCount) + { + assert(oldOperandCount <= inlineOperandCount); + assert(oldOperands == inlineOperands); + } + else + { + // The most difficult case: we need to recreate the dynamic array. + assert(compiler != nullptr); + + m_operands = compiler->getAllocator(CMK_ASTNode).allocate(newOperandCount); + } + } + else + { + // We are shrinking the array and may in process switch to an inline representation. + // We choose to do so for simplicity ("if a node has <= InlineOperandCount operands, + // then it stores them inline"), but actually it may be more profitable to not do that, + // it will save us a copy and a potential cache miss (though the latter seems unlikely). + + if ((newOperandCount <= inlineOperandCount) && (oldOperands != inlineOperands)) + { + m_operands = inlineOperands; + } + } + +#ifdef DEBUG + for (size_t i = 0; i < newOperandCount; i++) + { + m_operands[i] = nullptr; + } +#endif // DEBUG + + SetOperandCount(newOperandCount); +} + +/* static */ bool GenTreeMultiOp::OperandsAreEqual(GenTreeMultiOp* op1, GenTreeMultiOp* op2) +{ + if (op1->GetOperandCount() != op2->GetOperandCount()) + { + return false; + } + + for (size_t i = 1; i <= op1->GetOperandCount(); i++) + { + if (!Compare(op1->Op(i), op2->Op(i))) + { + return false; + } + } + + return true; +} + +void GenTreeMultiOp::InitializeOperands(GenTree** operands, size_t operandCount) +{ + for (size_t i = 0; i < operandCount; i++) + { + m_operands[i] = operands[i]; + gtFlags |= (operands[i]->gtFlags & GTF_ALL_EFFECT); + } + + SetOperandCount(operandCount); +} + var_types GenTreeJitIntrinsic::GetAuxiliaryType() const { CorInfoType auxiliaryJitType = GetAuxiliaryJitType(); @@ -18583,12 +18587,20 @@ var_types GenTreeJitIntrinsic::GetSimdBaseType() const // Returns true for the SIMD Intrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeSIMD::OperIsMemoryLoad() const { - if (gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + if (GetSIMDIntrinsicId() == SIMDIntrinsicInitArray) { return true; } return false; } + +// TODO-Review: why are layouts not compared here? +/* static */ bool GenTreeSIMD::Equals(GenTreeSIMD* op1, GenTreeSIMD* op2) +{ + return (op1->TypeGet() == op2->TypeGet()) && (op1->GetSIMDIntrinsicId() == op2->GetSIMDIntrinsicId()) && + (op1->GetSimdBaseType() == op2->GetSimdBaseType()) && (op1->GetSimdSize() == op2->GetSimdSize()) && + OperandsAreEqual(op1, op2); +} #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS @@ -18597,7 +18609,7 @@ bool GenTree::isCommutativeHWIntrinsic() const assert(gtOper == GT_HWINTRINSIC); #ifdef TARGET_XARCH - return HWIntrinsicInfo::IsCommutative(AsHWIntrinsic()->gtHWIntrinsicId); + return HWIntrinsicInfo::IsCommutative(AsHWIntrinsic()->GetHWIntrinsicId()); #else return false; #endif // TARGET_XARCH @@ -18608,7 +18620,7 @@ bool GenTree::isContainableHWIntrinsic() const assert(gtOper == GT_HWINTRINSIC); #ifdef TARGET_XARCH - switch (AsHWIntrinsic()->gtHWIntrinsicId) + switch (AsHWIntrinsic()->GetHWIntrinsicId()) { case NI_SSE_LoadAlignedVector128: case NI_SSE_LoadScalarVector128: @@ -18642,10 +18654,10 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) #if defined(TARGET_XARCH) if (!comp->canUseVexEncoding()) { - return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->gtHWIntrinsicId); + return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId()); } - switch (AsHWIntrinsic()->gtHWIntrinsicId) + switch (AsHWIntrinsic()->GetHWIntrinsicId()) { // TODO-XArch-Cleanup: Move this switch block to be table driven. @@ -18671,7 +18683,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) } } #elif defined(TARGET_ARM64) - return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->gtHWIntrinsicId); + return HWIntrinsicInfo::HasRMWSemantics(AsHWIntrinsic()->GetHWIntrinsicId()); #else return false; #endif @@ -18683,8 +18695,8 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, unsigned simdSize, bool isSimdAsHWIntrinsic) { - return new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(type, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, + simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, @@ -18696,8 +18708,8 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, { SetOpLclRelatedToSIMDIntrinsic(op1); - return new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(type, op1, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, + simdBaseJitType, simdSize, isSimdAsHWIntrinsic, op1); } GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, @@ -18711,8 +18723,8 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, SetOpLclRelatedToSIMDIntrinsic(op1); SetOpLclRelatedToSIMDIntrinsic(op2); - return new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, + simdBaseJitType, simdSize, isSimdAsHWIntrinsic, op1, op2); } GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, @@ -18728,8 +18740,8 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, SetOpLclRelatedToSIMDIntrinsic(op2); SetOpLclRelatedToSIMDIntrinsic(op3); - return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, - simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, + simdBaseJitType, simdSize, isSimdAsHWIntrinsic, op1, op2, op3); } GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, @@ -18747,8 +18759,44 @@ GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, SetOpLclRelatedToSIMDIntrinsic(op3); SetOpLclRelatedToSIMDIntrinsic(op4); - return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3, op4), hwIntrinsicID, - simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return new (this, GT_HWINTRINSIC) + GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic, op1, op2, op3, op4); +} + +GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, + GenTree** operands, + size_t operandCount, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic) +{ + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), operandCount); + for (size_t i = 0; i < operandCount; i++) + { + nodeBuilder.AddOperand(i, operands[i]); + SetOpLclRelatedToSIMDIntrinsic(operands[i]); + } + + return new (this, GT_HWINTRINSIC) + GenTreeHWIntrinsic(type, std::move(nodeBuilder), hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); +} + +GenTreeHWIntrinsic* Compiler::gtNewSimdHWIntrinsicNode(var_types type, + IntrinsicNodeBuilder&& nodeBuilder, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic) +{ + for (size_t i = 0; i < nodeBuilder.GetOperandCount(); i++) + { + SetOpLclRelatedToSIMDIntrinsic(nodeBuilder.GetOperand(i)); + } + + return new (this, GT_HWINTRINSIC) + GenTreeHWIntrinsic(type, std::move(nodeBuilder), hwIntrinsicID, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); } GenTree* Compiler::gtNewSimdAbsNode( @@ -21800,16 +21848,16 @@ GenTree* Compiler::gtNewSimdZeroNode(var_types type, GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, NamedIntrinsic hwIntrinsicID) { - return new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(type, hwIntrinsicID, CORINFO_TYPE_UNDEF, 0, /* isSimdAsHWIntrinsic */ false); + return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, + CORINFO_TYPE_UNDEF, 0, /* isSimdAsHWIntrinsic */ false); } GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, GenTree* op1, NamedIntrinsic hwIntrinsicID) { SetOpLclRelatedToSIMDIntrinsic(op1); - return new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(type, op1, hwIntrinsicID, CORINFO_TYPE_UNDEF, 0, /* isSimdAsHWIntrinsic */ false); + return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, + CORINFO_TYPE_UNDEF, 0, /* isSimdAsHWIntrinsic */ false, op1); } GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, @@ -21821,7 +21869,8 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types type, SetOpLclRelatedToSIMDIntrinsic(op2); return new (this, GT_HWINTRINSIC) - GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, CORINFO_TYPE_UNDEF, 0, /* isSimdAsHWIntrinsic */ false); + GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, CORINFO_TYPE_UNDEF, 0, + /* isSimdAsHWIntrinsic */ false, op1, op2); } GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode( @@ -21831,21 +21880,22 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode( SetOpLclRelatedToSIMDIntrinsic(op2); SetOpLclRelatedToSIMDIntrinsic(op3); - return new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, - CORINFO_TYPE_UNDEF, 0, /* isSimdAsHWIntrinsic */ false); + return new (this, GT_HWINTRINSIC) + GenTreeHWIntrinsic(type, getAllocator(CMK_ASTNode), hwIntrinsicID, CORINFO_TYPE_UNDEF, 0, + /* isSimdAsHWIntrinsic */ false, op1, op2, op3); } // Returns true for the HW Intrinsic instructions that have MemoryLoad semantics, false otherwise bool GenTreeHWIntrinsic::OperIsMemoryLoad() const { #if defined(TARGET_XARCH) || defined(TARGET_ARM64) - HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(gtHWIntrinsicId); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(GetHWIntrinsicId()); if (category == HW_Category_MemoryLoad) { return true; } #ifdef TARGET_XARCH - else if (HWIntrinsicInfo::MaybeMemoryLoad(gtHWIntrinsicId)) + else if (HWIntrinsicInfo::MaybeMemoryLoad(GetHWIntrinsicId())) { // Some intrinsics (without HW_Category_MemoryLoad) also have MemoryLoad semantics @@ -21855,19 +21905,19 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() const // Vector128 BroadcastScalarToVector128(Vector128 value) // Vector128 BroadcastScalarToVector128(byte* source) // So, we need to check the argument's type is memory-reference or Vector128 - assert(HWIntrinsicInfo::lookupNumArgs(this) == 1); - return (gtHWIntrinsicId == NI_AVX2_BroadcastScalarToVector128 || - gtHWIntrinsicId == NI_AVX2_BroadcastScalarToVector256) && - AsOp()->gtOp1->TypeGet() != TYP_SIMD16; + assert(GetOperandCount() == 1); + return (GetHWIntrinsicId() == NI_AVX2_BroadcastScalarToVector128 || + GetHWIntrinsicId() == NI_AVX2_BroadcastScalarToVector256) && + !Op(1)->TypeIs(TYP_SIMD16); } else if (category == HW_Category_IMM) { // Do we have less than 3 operands? - if (HWIntrinsicInfo::lookupNumArgs(this) < 3) + if (GetOperandCount() < 3) { return false; } - else if (HWIntrinsicInfo::isAVX2GatherIntrinsic(gtHWIntrinsicId)) + else if (HWIntrinsicInfo::isAVX2GatherIntrinsic(GetHWIntrinsicId())) { return true; } @@ -21882,13 +21932,13 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad() const bool GenTreeHWIntrinsic::OperIsMemoryStore() const { #if defined(TARGET_XARCH) || defined(TARGET_ARM64) - HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(gtHWIntrinsicId); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(GetHWIntrinsicId()); if (category == HW_Category_MemoryStore) { return true; } #ifdef TARGET_XARCH - else if (HWIntrinsicInfo::MaybeMemoryStore(gtHWIntrinsicId) && + else if (HWIntrinsicInfo::MaybeMemoryStore(GetHWIntrinsicId()) && (category == HW_Category_IMM || category == HW_Category_Scalar)) { // Some intrinsics (without HW_Category_MemoryStore) also have MemoryStore semantics @@ -21897,9 +21947,9 @@ bool GenTreeHWIntrinsic::OperIsMemoryStore() const // unsafe ulong MultiplyNoFlags(ulong left, ulong right, ulong* low) // // So, the 3-argument form is MemoryStore - if (HWIntrinsicInfo::lookupNumArgs(this) == 3) + if (GetOperandCount() == 3) { - switch (gtHWIntrinsicId) + switch (GetHWIntrinsicId()) { case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: @@ -21924,6 +21974,39 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoadOrStore() const #endif } +NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicId() const +{ + NamedIntrinsic id = gtHWIntrinsicId; + int numArgs = HWIntrinsicInfo::lookupNumArgs(id); + bool numArgsUnknown = numArgs < 0; + + assert((static_cast(numArgs) == GetOperandCount()) || numArgsUnknown); + + return id; +} + +void GenTreeHWIntrinsic::SetHWIntrinsicId(NamedIntrinsic intrinsicId) +{ +#ifdef DEBUG + size_t oldOperandCount = GetOperandCount(); + int newOperandCount = HWIntrinsicInfo::lookupNumArgs(intrinsicId); + bool newCountUnknown = newOperandCount < 0; + + // We'll choose to trust the programmer here. + assert((oldOperandCount == static_cast(newOperandCount)) || newCountUnknown); +#endif // DEBUG + + gtHWIntrinsicId = intrinsicId; +} + +// TODO-Review: why are layouts not compared here? +/* static */ bool GenTreeHWIntrinsic::Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2) +{ + return (op1->TypeGet() == op2->TypeGet()) && (op1->GetHWIntrinsicId() == op2->GetHWIntrinsicId()) && + (op1->GetSimdBaseType() == op2->GetSimdBaseType()) && (op1->GetSimdSize() == op2->GetSimdSize()) && + (op1->GetAuxiliaryType() == op2->GetAuxiliaryType()) && (op1->GetOtherReg() == op2->GetOtherReg()) && + OperandsAreEqual(op1, op2); +} #endif // FEATURE_HW_INTRINSICS //--------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 003542f8d8c33..8ca83d21bc1b4 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -880,6 +880,10 @@ struct GenTree // This stores the register assigned to the node. If a register is not assigned, _gtRegNum is set to REG_NA. regNumberSmall _gtRegNum; + // Count of operands. Used *only* by GenTreeMultiOp, exists solely due to padding constraints. + friend struct GenTreeMultiOp; + uint8_t m_operandCount; + public: // The register number is stored in a small format (8 bits), but the getters return and the setters take // a full-size (unsigned) format, to localize the casts here. @@ -1091,11 +1095,6 @@ struct GenTree // NOPs may only be present in LIR if they do not produce a value. return IsNothingNode(); - case GT_LIST: - // LIST nodes may not be present in a block's LIR sequence, but they may - // be present as children of an LIR node. - return (gtNext == nullptr) && (gtPrev == nullptr); - case GT_ADDR: { // ADDR ndoes may only be present in LIR if the location they refer to is not a @@ -1621,6 +1620,16 @@ struct GenTree OperIsStoreBlk(gtOper) || OperIsAtomicOp(gtOper)); } + static bool OperIsMultiOp(genTreeOps gtOper) + { + return OperIsSIMD(gtOper) || OperIsHWIntrinsic(gtOper); + } + + bool OperIsMultiOp() const + { + return OperIsMultiOp(OperGet()); + } + // This is here for cleaner FEATURE_SIMD #ifdefs. static bool OperIsSIMD(genTreeOps gtOper) { @@ -1704,16 +1713,13 @@ struct GenTree #ifdef DEBUG bool NullOp1Legal() const { - assert(OperIsSimple(gtOper)); + assert(OperIsSimple()); switch (gtOper) { case GT_LEA: case GT_RETFILT: case GT_NOP: case GT_FIELD: -#ifdef FEATURE_HW_INTRINSICS - case GT_HWINTRINSIC: -#endif // FEATURE_HW_INTRINSICS return true; case GT_RETURN: return gtType == TYP_VOID; @@ -1731,17 +1737,8 @@ struct GenTree } switch (gtOper) { - case GT_LIST: case GT_INTRINSIC: case GT_LEA: -#ifdef FEATURE_SIMD - case GT_SIMD: -#endif // !FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS - case GT_HWINTRINSIC: -#endif // FEATURE_HW_INTRINSICS - #if defined(TARGET_ARM) case GT_PUTARG_REG: #endif // defined(TARGET_ARM) @@ -1763,30 +1760,10 @@ struct GenTree inline bool IsBoxedValue(); - static bool OperIsList(genTreeOps gtOper) - { - return gtOper == GT_LIST; - } - - bool OperIsList() const - { - return OperIsList(gtOper); - } - - static bool OperIsAnyList(genTreeOps gtOper) - { - return OperIsList(gtOper); - } - - bool OperIsAnyList() const - { - return OperIsAnyList(gtOper); - } - inline GenTree* gtGetOp1() const; // Directly return op2. Asserts the node is binary. Might return nullptr if the binary node allows - // a nullptr op2, such as GT_LIST. This is more efficient than gtGetOp2IfPresent() if you know what + // a nullptr op2, such as GT_LEA. This is more efficient than gtGetOp2IfPresent() if you know what // node type you have. inline GenTree* gtGetOp2() const; @@ -1802,8 +1779,6 @@ struct GenTree bool TryGetUse(GenTree* def, GenTree*** use); private: - bool TryGetUseList(GenTree* def, GenTree*** use); - bool TryGetUseBinOp(GenTree* def, GenTree*** use); public: @@ -2105,6 +2080,16 @@ struct GenTree return (gtFlags & GTF_REVERSE_OPS) ? true : false; } + void SetReverseOp() + { + gtFlags |= GTF_REVERSE_OPS; + } + + void ClearReverseOp() + { + gtFlags &= ~GTF_REVERSE_OPS; + } + bool IsUnsigned() const { return ((gtFlags & GTF_UNSIGNED) != 0); @@ -2302,9 +2287,6 @@ struct GenTree void VisitOperands(TVisitor visitor); private: - template - VisitResult VisitListOperands(TVisitor visitor); - template void VisitBinOpOperands(TVisitor visitor); @@ -2787,10 +2769,6 @@ struct GenTreeFieldList : public GenTree // GenTreeUseEdgeIterator: an iterator that will produce each use edge of a GenTree node in the order in which // they are used. // -// The use edges of a node may not correspond exactly to the nodes on the other ends of its use edges: in -// particular, GT_LIST nodes are expanded into their component parts. This differs from the behavior of -// GenTree::GetChildPointer(), which does not expand lists. -// // Operand iteration is common enough in the back end of the compiler that the implementation of this type has // traded some simplicity for speed: // - As much work as is reasonable is done in the constructor rather than during operand iteration @@ -2823,7 +2801,8 @@ class GenTreeUseEdgeIterator final AdvanceFn m_advance; GenTree* m_node; GenTree** m_edge; - // Pointer sized state storage, GenTreeArgList* or GenTreePhi::Use* or GenTreeCall::Use* currently. + // Pointer sized state storage, GenTreePhi::Use* or GenTreeCall::Use* + // or the exclusive end/beginning of GenTreeMultiOp's operand array. void* m_statePtr; // Integer sized state storage, usually the operand index for non-list based nodes. int m_state; @@ -2843,14 +2822,16 @@ class GenTreeUseEdgeIterator final void AdvanceBinOp(); void SetEntryStateForBinOp(); - // An advance function for list-like nodes (Phi, SIMDIntrinsicInitN, FieldList) - void AdvanceList(); - void SetEntryStateForList(GenTreeArgList* list); - // The advance function for call nodes template void AdvanceCall(); +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + void AdvanceMultiOp(); + void AdvanceReversedMultiOp(); + void SetEntryStateForMultiOp(); +#endif + void Terminate(); public: @@ -3732,54 +3713,6 @@ struct GenTreeField : public GenTreeUnOp } }; -// Represents the Argument list of a call node, as a Lisp-style linked list. -// (Originally I had hoped that this could have *only* the m_arg/m_rest fields, but it turns out -// that enough of the GenTree mechanism is used that it makes sense just to make it a subtype. But -// note that in many ways, this is *not* a "real" node of the tree, but rather a mechanism for -// giving call nodes a flexible number of children. GenTreeArgListNodes never evaluate to registers, -// for example.) - -// Note that while this extends GenTreeOp, it is *not* an EXOP. We don't add any new fields, and one -// is free to allocate a GenTreeOp of type GT_LIST. If you use this type, you get the convenient Current/Rest -// method names for the arguments. -struct GenTreeArgList : public GenTreeOp -{ - GenTree*& Current() - { - return gtOp1; - } - GenTreeArgList*& Rest() - { - assert(gtOp2 == nullptr || gtOp2->OperIsAnyList()); - return *reinterpret_cast(>Op2); - } - -#if DEBUGGABLE_GENTREE - GenTreeArgList() : GenTreeOp() - { - } -#endif - - GenTreeArgList(GenTree* arg) : GenTreeArgList(arg, nullptr) - { - } - - GenTreeArgList(GenTree* arg, GenTreeArgList* rest) : GenTreeArgList(GT_LIST, arg, rest) - { - } - - GenTreeArgList(genTreeOps oper, GenTree* arg, GenTreeArgList* rest) : GenTreeOp(oper, TYP_VOID, arg, rest) - { - assert(OperIsAnyList(oper)); - assert((arg != nullptr) && arg->IsValidCallArgument()); - gtFlags |= arg->gtFlags & GTF_ALL_EFFECT; - if (rest != nullptr) - { - gtFlags |= rest->gtFlags & GTF_ALL_EFFECT; - } - } -}; - // There was quite a bit of confusion in the code base about which of gtOp1 and gtOp2 was the // 'then' and 'else' clause of a colon node. Adding these accessors, while not enforcing anything, // at least *allows* the programmer to be obviously correct. @@ -5124,18 +5057,243 @@ struct GenTreeIntrinsic : public GenTreeOp #endif }; -struct GenTreeJitIntrinsic : public GenTreeOp +// GenTreeMultiOp - a node with a flexible count of operands stored in an array. +// The array can be an inline one, or a dynamic one, or both, with switching +// between them supported. See GenTreeJitIntrinsic for an example of a node +// utilizing GenTreeMultiOp. GTF_REVERSE_OPS is supported for GenTreeMultiOp's +// with two operands. +// +struct GenTreeMultiOp : public GenTree { +public: + class Iterator + { + protected: + GenTree** m_use; + + Iterator(GenTree** use) : m_use(use) + { + } + + public: + Iterator& operator++() + { + m_use++; + return *this; + } + + bool operator==(const Iterator& other) const + { + return m_use == other.m_use; + } + + bool operator!=(const Iterator& other) const + { + return m_use != other.m_use; + } + }; + + class OperandsIterator final : public Iterator + { + public: + OperandsIterator(GenTree** use) : Iterator(use) + { + } + + GenTree* operator*() + { + return *m_use; + } + }; + + class UseEdgesIterator final : public Iterator + { + public: + UseEdgesIterator(GenTree** use) : Iterator(use) + { + } + + GenTree** operator*() + { + return m_use; + } + }; + private: - ClassLayout* gtLayout; + GenTree** m_operands; - unsigned char gtAuxiliaryJitType; // For intrinsics than need another type (e.g. Avx2.Gather* or SIMD (by element)) - regNumberSmall gtOtherReg; // For intrinsics that return 2 registers +protected: + template + GenTreeMultiOp(genTreeOps oper, + var_types type, + CompAllocator allocator, + GenTree* (&inlineOperands)[InlineOperandCount] DEBUGARG(bool largeNode), + Operands... operands) + : GenTree(oper, type DEBUGARG(largeNode)) + { + const size_t OperandCount = sizeof...(Operands); - unsigned char gtSimdBaseJitType; // SIMD vector base JIT type - unsigned char gtSimdSize; // SIMD vector size in bytes, use 0 for scalar intrinsics + m_operands = (OperandCount <= InlineOperandCount) ? inlineOperands : allocator.allocate(OperandCount); + + // "OperandCount + 1" so that it works well when OperandCount is 0. + GenTree* operandsArray[OperandCount + 1]{operands...}; + InitializeOperands(operandsArray, OperandCount); + } + + // Note that this constructor takes the owndership of the "operands" array. + template + GenTreeMultiOp(genTreeOps oper, + var_types type, + GenTree** operands, + size_t operandCount, + GenTree* (&inlineOperands)[InlineOperandCount] DEBUGARG(bool largeNode)) + : GenTree(oper, type DEBUGARG(largeNode)) + { + m_operands = (operandCount <= InlineOperandCount) ? inlineOperands : operands; + + InitializeOperands(operands, operandCount); + } public: +#if DEBUGGABLE_GENTREE + GenTreeMultiOp() : GenTree() + { + } +#endif + + GenTree*& Op(size_t index) + { + size_t actualIndex = index - 1; + assert(actualIndex < m_operandCount); + assert(m_operands[actualIndex] != nullptr); + + return m_operands[actualIndex]; + } + + GenTree* Op(size_t index) const + { + return const_cast(this)->Op(index); + } + + // Note that unlike the general "Operands" iterator, this specialized version does not respect GTF_REVERSE_OPS. + IteratorPair Operands() + { + return MakeIteratorPair(OperandsIterator(GetOperandArray()), + OperandsIterator(GetOperandArray() + GetOperandCount())); + } + + // Note that unlike the general "UseEdges" iterator, this specialized version does not respect GTF_REVERSE_OPS. + IteratorPair UseEdges() + { + return MakeIteratorPair(UseEdgesIterator(GetOperandArray()), + UseEdgesIterator(GetOperandArray() + GetOperandCount())); + } + + size_t GetOperandCount() const + { + return m_operandCount; + } + + GenTree** GetOperandArray(size_t startIndex = 0) const + { + return m_operands + startIndex; + } + +protected: + // Reconfigures the operand array, leaving it in a "dirty" state. + void ResetOperandArray(size_t newOperandCount, + Compiler* compiler, + GenTree** inlineOperands, + size_t inlineOperandCount); + + static bool OperandsAreEqual(GenTreeMultiOp* op1, GenTreeMultiOp* op2); + +private: + void InitializeOperands(GenTree** operands, size_t operandCount); + + void SetOperandCount(size_t newOperandCount) + { + assert(FitsIn(newOperandCount)); + m_operandCount = static_cast(newOperandCount); + } +}; + +// Helper class used to implement the constructor of GenTreeJitIntrinsic which +// transfers the ownership of the passed-in array to the underlying MultiOp node. +class IntrinsicNodeBuilder final +{ + friend struct GenTreeJitIntrinsic; + + GenTree** m_operands; + size_t m_operandCount; + GenTree* m_inlineOperands[2]; + +public: + IntrinsicNodeBuilder(CompAllocator allocator, size_t operandCount) : m_operandCount(operandCount) + { + m_operands = + (operandCount <= ArrLen(m_inlineOperands)) ? m_inlineOperands : allocator.allocate(operandCount); +#ifdef DEBUG + for (size_t i = 0; i < operandCount; i++) + { + m_operands[i] = nullptr; + } +#endif // DEBUG + } + + IntrinsicNodeBuilder(CompAllocator allocator, GenTreeMultiOp* source) : m_operandCount(source->GetOperandCount()) + { + m_operands = (m_operandCount <= ArrLen(m_inlineOperands)) ? m_inlineOperands + : allocator.allocate(m_operandCount); + for (size_t i = 0; i < m_operandCount; i++) + { + m_operands[i] = source->Op(i + 1); + } + } + + void AddOperand(size_t index, GenTree* operand) + { + assert(index < m_operandCount); + assert(m_operands[index] == nullptr); + m_operands[index] = operand; + } + + GenTree* GetOperand(size_t index) const + { + assert(index < m_operandCount); + assert(m_operands[index] != nullptr); + return m_operands[index]; + } + + size_t GetOperandCount() const + { + return m_operandCount; + } + +private: + GenTree** GetBuiltOperands() + { +#ifdef DEBUG + for (size_t i = 0; i < m_operandCount; i++) + { + assert(m_operands[i] != nullptr); + } +#endif // DEBUG + + return m_operands; + } +}; + +struct GenTreeJitIntrinsic : public GenTreeMultiOp +{ +protected: + GenTree* gtInlineOperands[2]; + uint16_t gtLayoutNum; + unsigned char gtAuxiliaryJitType; // For intrinsics than need another type (e.g. Avx2.Gather* or SIMD (by element)) + regNumberSmall gtOtherReg; // For intrinsics that return 2 registers + unsigned char gtSimdBaseJitType; // SIMD vector base JIT type + unsigned char gtSimdSize; // SIMD vector size in bytes, use 0 for scalar intrinsics + #if defined(FEATURE_SIMD) union { SIMDIntrinsicID gtSIMDIntrinsicID; // operation Id @@ -5145,15 +5303,16 @@ struct GenTreeJitIntrinsic : public GenTreeOp NamedIntrinsic gtHWIntrinsicId; #endif - ClassLayout* GetLayout() const +public: + unsigned GetLayoutNum() const { - return gtLayout; + return gtLayoutNum; } - void SetLayout(ClassLayout* layout) + void SetLayoutNum(unsigned layoutNum) { - assert(layout != nullptr); - gtLayout = layout; + assert(FitsIn(layoutNum)); + gtLayoutNum = static_cast(layoutNum); } regNumber GetOtherReg() const @@ -5204,10 +5363,15 @@ struct GenTreeJitIntrinsic : public GenTreeOp assert(gtSimdSize == simdSize); } - GenTreeJitIntrinsic( - genTreeOps oper, var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) - : GenTreeOp(oper, type, op1, op2) - , gtLayout(nullptr) + template + GenTreeJitIntrinsic(genTreeOps oper, + var_types type, + CompAllocator allocator, + CorInfoType simdBaseJitType, + unsigned simdSize, + Operands... operands) + : GenTreeMultiOp(oper, type, allocator, gtInlineOperands DEBUGARG(false), operands...) + , gtLayoutNum(0) , gtAuxiliaryJitType(CORINFO_TYPE_UNDEF) , gtOtherReg(REG_NA) , gtSimdBaseJitType((unsigned char)simdBaseJitType) @@ -5218,16 +5382,39 @@ struct GenTreeJitIntrinsic : public GenTreeOp assert(gtSimdSize == simdSize); } - bool isSIMD() const +#if DEBUGGABLE_GENTREE + GenTreeJitIntrinsic() : GenTreeMultiOp() { - return gtSimdSize != 0; } +#endif -#if DEBUGGABLE_GENTREE - GenTreeJitIntrinsic() : GenTreeOp() +protected: + GenTreeJitIntrinsic(genTreeOps oper, + var_types type, + IntrinsicNodeBuilder&& nodeBuilder, + CorInfoType simdBaseJitType, + unsigned simdSize) + : GenTreeMultiOp(oper, + type, + nodeBuilder.GetBuiltOperands(), + nodeBuilder.GetOperandCount(), + gtInlineOperands DEBUGARG(false)) + , gtLayoutNum(0) + , gtAuxiliaryJitType(CORINFO_TYPE_UNDEF) + , gtOtherReg(REG_NA) + , gtSimdBaseJitType((unsigned char)simdBaseJitType) + , gtSimdSize((unsigned char)simdSize) + , gtHWIntrinsicId(NI_Illegal) { + assert(gtSimdBaseJitType == simdBaseJitType); + assert(gtSimdSize == simdSize); + } + +public: + bool isSIMD() const + { + return gtSimdSize != 0; } -#endif }; #ifdef FEATURE_SIMD @@ -5235,63 +5422,69 @@ struct GenTreeJitIntrinsic : public GenTreeOp /* gtSIMD -- SIMD intrinsic (possibly-binary op [NULL op2 is allowed] with additional fields) */ struct GenTreeSIMD : public GenTreeJitIntrinsic { + GenTreeSIMD(var_types type, + IntrinsicNodeBuilder&& nodeBuilder, + SIMDIntrinsicID simdIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize) + : GenTreeJitIntrinsic(GT_SIMD, type, std::move(nodeBuilder), simdBaseJitType, simdSize) + { + gtSIMDIntrinsicID = simdIntrinsicID; + } - GenTreeSIMD( - var_types type, GenTree* op1, SIMDIntrinsicID simdIntrinsicID, CorInfoType simdBaseJitType, unsigned simdSize) - : GenTreeJitIntrinsic(GT_SIMD, type, op1, nullptr, simdBaseJitType, simdSize) + GenTreeSIMD(var_types type, + CompAllocator allocator, + GenTree* op1, + SIMDIntrinsicID simdIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize) + : GenTreeJitIntrinsic(GT_SIMD, type, allocator, simdBaseJitType, simdSize, op1) { gtSIMDIntrinsicID = simdIntrinsicID; } GenTreeSIMD(var_types type, + CompAllocator allocator, GenTree* op1, GenTree* op2, SIMDIntrinsicID simdIntrinsicID, CorInfoType simdBaseJitType, unsigned simdSize) - : GenTreeJitIntrinsic(GT_SIMD, type, op1, op2, simdBaseJitType, simdSize) + : GenTreeJitIntrinsic(GT_SIMD, type, allocator, simdBaseJitType, simdSize, op1, op2) { gtSIMDIntrinsicID = simdIntrinsicID; } - bool OperIsMemoryLoad() const; // Returns true for the SIMD Intrinsic instructions that have MemoryLoad semantics, - // false otherwise - #if DEBUGGABLE_GENTREE GenTreeSIMD() : GenTreeJitIntrinsic() { } #endif + + bool OperIsMemoryLoad() const; // Returns true for the SIMD Intrinsic instructions that have MemoryLoad semantics, + // false otherwise + + SIMDIntrinsicID GetSIMDIntrinsicId() const + { + return gtSIMDIntrinsicID; + } + + static bool Equals(GenTreeSIMD* op1, GenTreeSIMD* op2); }; #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic { - GenTreeHWIntrinsic(var_types type, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - bool isSimdAsHWIntrinsic) - : GenTreeJitIntrinsic(GT_HWINTRINSIC, type, nullptr, nullptr, simdBaseJitType, simdSize) - { - gtHWIntrinsicId = hwIntrinsicID; - - if (isSimdAsHWIntrinsic) - { - gtFlags |= GTF_SIMDASHW_OP; - } - } - - GenTreeHWIntrinsic(var_types type, - GenTree* op1, - NamedIntrinsic hwIntrinsicID, - CorInfoType simdBaseJitType, - unsigned simdSize, - bool isSimdAsHWIntrinsic) - : GenTreeJitIntrinsic(GT_HWINTRINSIC, type, op1, nullptr, simdBaseJitType, simdSize) + GenTreeHWIntrinsic(var_types type, + IntrinsicNodeBuilder&& nodeBuilder, + NamedIntrinsic hwIntrinsicID, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isSimdAsHWIntrinsic) + : GenTreeJitIntrinsic(GT_HWINTRINSIC, type, std::move(nodeBuilder), simdBaseJitType, simdSize) { - gtHWIntrinsicId = hwIntrinsicID; + SetHWIntrinsicId(hwIntrinsicID); if (OperIsMemoryStore()) { @@ -5304,18 +5497,19 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic } } + template GenTreeHWIntrinsic(var_types type, - GenTree* op1, - GenTree* op2, + CompAllocator allocator, NamedIntrinsic hwIntrinsicID, CorInfoType simdBaseJitType, unsigned simdSize, - bool isSimdAsHWIntrinsic) - : GenTreeJitIntrinsic(GT_HWINTRINSIC, type, op1, op2, simdBaseJitType, simdSize) + bool isSimdAsHWIntrinsic, + Operands... operands) + : GenTreeJitIntrinsic(GT_HWINTRINSIC, type, allocator, simdBaseJitType, simdSize, operands...) { - gtHWIntrinsicId = hwIntrinsicID; + SetHWIntrinsicId(hwIntrinsicID); - if (OperIsMemoryStore()) + if ((sizeof...(Operands) > 0) && OperIsMemoryStore()) { gtFlags |= (GTF_GLOB_REF | GTF_ASG); } @@ -5326,9 +5520,11 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic } } - // Note that HW Intrinsic instructions are a sub class of GenTreeOp which only supports two operands - // However there are HW Intrinsic instructions that have 3 or even 4 operands and this is - // supported using a single op1 and using an ArgList for it: gtNewArgList(op1, op2, op3) +#if DEBUGGABLE_GENTREE + GenTreeHWIntrinsic() : GenTreeJitIntrinsic() + { + } +#endif bool OperIsMemoryLoad() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad semantics, // false otherwise @@ -5336,17 +5532,99 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic // false otherwise bool OperIsMemoryLoadOrStore() const; // Returns true for the HW Intrinsic instructions that have MemoryLoad or // MemoryStore semantics, false otherwise - bool IsSimdAsHWIntrinsic() const { return (gtFlags & GTF_SIMDASHW_OP) != 0; } -#if DEBUGGABLE_GENTREE - GenTreeHWIntrinsic() : GenTreeJitIntrinsic() + NamedIntrinsic GetHWIntrinsicId() const; + + //--------------------------------------------------------------------------------------- + // ChangeHWIntrinsicId: Change the intrinsic id for this node. + // + // This method just sets the intrinsic id, asserting that the new intrinsic + // has the same number of operands as the old one, optionally setting some of + // the new operands. Intrinsics with an unknown number of operands are exempt + // from the "do I have the same number of operands" check however, so this method must + // be used with care. Use "ResetHWIntrinsicId" if you need to fully reconfigure + // the node for a different intrinsic, with a possibly different number of operands. + // + // Arguments: + // intrinsicId - the new intrinsic id for the node + // operands - optional operands to set while changing the id + // + // Notes: + // It is the caller's responsibility to update side effect flags. + // + template + void ChangeHWIntrinsicId(NamedIntrinsic intrinsicId, Operands... operands) { + const size_t OperandCount = sizeof...(Operands); + assert(OperandCount <= GetOperandCount()); + + SetHWIntrinsicId(intrinsicId); + + GenTree* operandsArray[OperandCount + 1]{operands...}; + GenTree** operandsStore = GetOperandArray(); + + for (size_t i = 0; i < OperandCount; i++) + { + operandsStore[i] = operandsArray[i]; + } } -#endif + + //--------------------------------------------------------------------------------------- + // ResetHWIntrinsicId: Reset the intrinsic id for this node. + // + // This method resets the intrinsic id, fully reconfiguring the node. It must + // be supplied with all the operands the new node needs, and can allocate a + // new dynamic array if the operands do not fit into in an inline one, in which + // case a compiler argument is used to get the memory allocator. + // + // This method is similar to "ChangeHWIntrinsicId" but is more versatile and + // thus more expensive. Use it when you need to bash to an intrinsic id with + // a different number of operands than what the original node had, or, which + // is equivalent, when you do not know the original number of operands. + // + // Arguments: + // intrinsicId - the new intrinsic id for the node + // compiler - compiler to allocate memory with, can be "nullptr" if the + // number of new operands does not exceed the length of the + // inline array (so, there are 2 or fewer of them) + // operands - *all* operands for the new node + // + // Notes: + // It is the caller's responsibility to update side effect flags. + // + template + void ResetHWIntrinsicId(NamedIntrinsic intrinsicId, Compiler* compiler, Operands... operands) + { + const size_t NewOperandCount = sizeof...(Operands); + assert((compiler != nullptr) || (NewOperandCount <= ArrLen(gtInlineOperands))); + + ResetOperandArray(NewOperandCount, compiler, gtInlineOperands, ArrLen(gtInlineOperands)); + ChangeHWIntrinsicId(intrinsicId, operands...); + } + + void ResetHWIntrinsicId(NamedIntrinsic intrinsicId, GenTree* op1, GenTree* op2) + { + ResetHWIntrinsicId(intrinsicId, static_cast(nullptr), op1, op2); + } + + void ResetHWIntrinsicId(NamedIntrinsic intrinsicId, GenTree* op1) + { + ResetHWIntrinsicId(intrinsicId, static_cast(nullptr), op1); + } + + void ResetHWIntrinsicId(NamedIntrinsic intrinsicId) + { + ResetHWIntrinsicId(intrinsicId, static_cast(nullptr)); + } + + static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); + +private: + void SetHWIntrinsicId(NamedIntrinsic intrinsicId); }; #endif // FEATURE_HW_INTRINSICS @@ -7396,11 +7674,11 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal) const #ifdef FEATURE_SIMD // SIMDIntrinsicInit intrinsic with a const value as initializer // represents a const vector. - if ((gtOper == GT_SIMD) && (AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInit) && - gtGetOp1()->IsIntegralConst(constVal)) + if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit) && + AsSIMD()->Op(1)->IsIntegralConst(constVal)) { assert(varTypeIsIntegral(AsSIMD()->GetSimdBaseType())); - assert(gtGetOp2IfPresent() == nullptr); + assert(AsSIMD()->GetOperandCount() == 1); return true; } #endif // FEATURE_SIMD @@ -7416,34 +7694,23 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal) const return false; } - GenTree* op1 = gtGetOp1(); - GenTree* op2 = gtGetOp2(); + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - - if (op1 == nullptr) + if ((node->GetOperandCount() == 0) && (constVal == 0)) { - assert(op2 == nullptr); - - if (constVal == 0) - { #if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); + return (intrinsicId == NI_Vector128_get_Zero) || (intrinsicId == NI_Vector256_get_Zero); #elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); + return (intrinsicId == NI_Vector64_get_Zero) || (intrinsicId == NI_Vector128_get_Zero); #endif // !TARGET_XARCH && !TARGET_ARM64 - } } - else if ((op2 == nullptr) && !op1->OperIsList()) + else if ((node->GetOperandCount() == 1) && node->Op(1)->IsIntegralConst(constVal)) { - if (op1->IsIntegralConst(constVal)) - { #if defined(TARGET_XARCH) - return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create); + return (intrinsicId == NI_Vector128_Create) || (intrinsicId == NI_Vector256_Create); #elif defined(TARGET_ARM64) - return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create); + return (intrinsicId == NI_Vector64_Create) || (intrinsicId == NI_Vector128_Create); #endif // !TARGET_XARCH && !TARGET_ARM64 - } } } #endif // FEATURE_HW_INTRINSICS @@ -7461,9 +7728,9 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal) const inline bool GenTree::IsSIMDZero() const { #ifdef FEATURE_SIMD - if ((gtOper == GT_SIMD) && (AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInit)) + if ((gtOper == GT_SIMD) && (AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInit)) { - return (gtGetOp1()->IsIntegralConst(0) || gtGetOp1()->IsFPZero()); + return (AsSIMD()->Op(1)->IsIntegralConst(0) || AsSIMD()->Op(1)->IsFPZero()); } #endif @@ -7502,10 +7769,6 @@ inline bool GenTree::IsBoxedValue() inline bool GenTree::IsValidCallArgument() { - if (OperIsList()) - { - return false; - } if (OperIs(GT_FIELD_LIST)) { #if !FEATURE_MULTIREG_ARGS && !FEATURE_PUT_STRUCT_ARG_STK @@ -7575,7 +7838,7 @@ inline GenTree* GenTree::gtGetOp2() const GenTree* op2 = AsOp()->gtOp2; - // Only allow null op2 if the node type allows it, e.g. GT_LIST. + // Only allow null op2 if the node type allows it, e.g. GT_LEA. assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper)); return op2; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index 1c5a9ff0bc37e..f756a3b7bd470 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -193,11 +193,11 @@ GTNODE(RSH_LO , GenTreeOp ,0,GTK_BINOP) #endif // !defined(TARGET_64BIT) #ifdef FEATURE_SIMD -GTNODE(SIMD , GenTreeSIMD ,0,(GTK_BINOP|GTK_EXOP)) // SIMD functions/operators/intrinsics +GTNODE(SIMD , GenTreeSIMD ,0,GTK_SPECIAL) // SIMD functions/operators/intrinsics #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS -GTNODE(HWINTRINSIC , GenTreeHWIntrinsic ,0,(GTK_BINOP|GTK_EXOP)) // hardware intrinsics +GTNODE(HWINTRINSIC , GenTreeHWIntrinsic ,0,GTK_SPECIAL) // hardware intrinsics #endif // FEATURE_HW_INTRINSICS //----------------------------------------------------------------------------- @@ -246,8 +246,6 @@ GTNODE(BT , GenTreeOp ,0,(GTK_BINOP|GTK_NOVALUE)) // The GTNODE(JTRUE , GenTreeOp ,0,(GTK_UNOP|GTK_NOVALUE)) -GTNODE(LIST , GenTreeArgList ,0,(GTK_BINOP|GTK_NOVALUE)) - //----------------------------------------------------------------------------- // Other nodes that have special structure: //----------------------------------------------------------------------------- diff --git a/src/coreclr/jit/gtstructs.h b/src/coreclr/jit/gtstructs.h index b4bad947fd90f..70a7901b7aca2 100644 --- a/src/coreclr/jit/gtstructs.h +++ b/src/coreclr/jit/gtstructs.h @@ -67,7 +67,6 @@ GTSTRUCT_1(Cast , GT_CAST) GTSTRUCT_1(Box , GT_BOX) GTSTRUCT_1(Field , GT_FIELD) GTSTRUCT_1(Call , GT_CALL) -GTSTRUCT_1(ArgList , GT_LIST) GTSTRUCT_1(FieldList , GT_FIELD_LIST) GTSTRUCT_1(Colon , GT_COLON) GTSTRUCT_1(FptrVal , GT_FTN_ADDR) @@ -76,10 +75,13 @@ GTSTRUCT_1(Index , GT_INDEX) GTSTRUCT_1(IndexAddr , GT_INDEX_ADDR) #if defined(FEATURE_HW_INTRINSICS) && defined(FEATURE_SIMD) GTSTRUCT_3(BoundsChk , GT_ARR_BOUNDS_CHECK, GT_SIMD_CHK, GT_HW_INTRINSIC_CHK) +GTSTRUCT_N(MultiOp , GT_SIMD, GT_HWINTRINSIC) #elif defined(FEATURE_SIMD) GTSTRUCT_2(BoundsChk , GT_ARR_BOUNDS_CHECK, GT_SIMD_CHK) +GTSTRUCT_N(MultiOp , GT_SIMD) #elif defined(FEATURE_HW_INTRINSICS) GTSTRUCT_2(BoundsChk , GT_ARR_BOUNDS_CHECK, GT_HW_INTRINSIC_CHK) +GTSTRUCT_N(MultiOp , GT_HWINTRINSIC) #else // !FEATURE_SIMD && !FEATURE_HW_INTRINSICS GTSTRUCT_1(BoundsChk , GT_ARR_BOUNDS_CHECK) #endif // !FEATURE_SIMD && !FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 908dac27c60ff..70ea91dc4032c 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -366,92 +366,6 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI return simdSize; } -//------------------------------------------------------------------------ -// lookupNumArgs: Gets the number of args for a given HWIntrinsic node -// -// Arguments: -// node -- The HWIntrinsic node to get the number of args for -// -// Return Value: -// The number of args for the HWIntrinsic associated with node -int HWIntrinsicInfo::lookupNumArgs(const GenTreeHWIntrinsic* node) -{ - assert(node != nullptr); - - NamedIntrinsic id = node->gtHWIntrinsicId; - int numArgs = lookupNumArgs(id); - - if (numArgs >= 0) - { - return numArgs; - } - - assert(numArgs == -1); - - GenTree* op1 = node->gtGetOp1(); - - if (op1 == nullptr) - { - return 0; - } - - if (op1->OperIsList()) - { - GenTreeArgList* list = op1->AsArgList(); - numArgs = 0; - - do - { - numArgs++; - list = list->Rest(); - } while (list != nullptr); - - return numArgs; - } - - GenTree* op2 = node->gtGetOp2(); - - return (op2 == nullptr) ? 1 : 2; -} - -//------------------------------------------------------------------------ -// lookupLastOp: Gets the last operand for a given HWIntrinsic node -// -// Arguments: -// node -- The HWIntrinsic node to get the last operand for -// -// Return Value: -// The last operand for node -GenTree* HWIntrinsicInfo::lookupLastOp(const GenTreeHWIntrinsic* node) -{ - assert(node != nullptr); - - GenTree* op1 = node->gtGetOp1(); - - if (op1 == nullptr) - { - return nullptr; - } - - if (op1->OperIsList()) - { - GenTreeArgList* list = op1->AsArgList(); - GenTree* last; - - do - { - last = list->Current(); - list = list->Rest(); - } while (list != nullptr); - - return last; - } - - GenTree* op2 = node->gtGetOp2(); - - return (op2 == nullptr) ? op1 : op2; -} - //------------------------------------------------------------------------ // isImmOp: Checks whether the HWIntrinsic node has an imm operand // diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 186b64463d820..43bd543c599f1 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -309,8 +309,6 @@ struct HWIntrinsicInfo static CORINFO_InstructionSet lookupIsa(const char* className, const char* enclosingClassName); static unsigned lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORINFO_SIG_INFO* sig); - static int lookupNumArgs(const GenTreeHWIntrinsic* node); - static GenTree* lookupLastOp(const GenTreeHWIntrinsic* node); #if defined(TARGET_XARCH) static int lookupImmUpperBound(NamedIntrinsic intrinsic); @@ -725,7 +723,7 @@ struct HWIntrinsic final { assert(node != nullptr); - id = node->gtHWIntrinsicId; + id = node->GetHWIntrinsicId(); category = HWIntrinsicInfo::lookupCategory(id); assert(HWIntrinsicInfo::RequiresCodegen(id)); @@ -749,53 +747,34 @@ struct HWIntrinsic final GenTree* op2; GenTree* op3; GenTree* op4; - int numOperands; + size_t numOperands; var_types baseType; private: void InitializeOperands(const GenTreeHWIntrinsic* node) { - op1 = node->gtGetOp1(); - op2 = node->gtGetOp2(); + numOperands = node->GetOperandCount(); - if (op1 == nullptr) + switch (numOperands) { - numOperands = 0; - } - else if (op1->OperIsList()) - { - assert(op2 == nullptr); - - GenTreeArgList* list = op1->AsArgList(); - op1 = list->Current(); - list = list->Rest(); - op2 = list->Current(); - list = list->Rest(); - op3 = list->Current(); - list = list->Rest(); - - if (list != nullptr) - { - op4 = list->Current(); - assert(list->Rest() == nullptr); + case 4: + op4 = node->Op(4); + FALLTHROUGH; + case 3: + op3 = node->Op(3); + FALLTHROUGH; + case 2: + op2 = node->Op(2); + FALLTHROUGH; + case 1: + op1 = node->Op(1); + FALLTHROUGH; + case 0: + break; - numOperands = 4; - } - else - { - numOperands = 3; - } - } - else if (op2 != nullptr) - { - numOperands = 2; - } - else - { - numOperands = 1; + default: + unreached(); } - - assert(HWIntrinsicInfo::lookupNumArgs(id) == numOperands); } void InitializeBaseType(const GenTreeHWIntrinsic* node) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index f72fbaf1df7e1..7fea81a34e2c1 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -498,31 +498,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // We shouldn't handle this as an intrinsic if the // respective ISAs have been disabled by the user. - if (sig->numArgs == 1) - { - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - } - else if (sig->numArgs == 2) + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); + + for (int i = sig->numArgs - 1; i >= 0; i--) { - op2 = impPopStack().val; - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + nodeBuilder.AddOperand(i, impPopStack().val); } - else - { - assert(sig->numArgs >= 3); - GenTreeArgList* tmp = nullptr; - - for (unsigned i = 0; i < sig->numArgs; i++) - { - tmp = gtNewListNode(impPopStack().val, tmp); - } - - op1 = tmp; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - } + retNode = gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); break; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 8604d0db811d2..706b988f049e5 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -51,7 +51,7 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTre } else { - const HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrin->gtHWIntrinsicId); + const HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrin->GetHWIntrinsicId()); if (category == HW_Category_SIMDByIndexedElement) { @@ -71,13 +71,13 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTre assert(varTypeIsSIMD(indexedElementOpType)); const unsigned int indexedElementSimdSize = genTypeSize(indexedElementOpType); - HWIntrinsicInfo::lookupImmBounds(intrin->gtHWIntrinsicId, indexedElementSimdSize, intrin->GetSimdBaseType(), - &immLowerBound, &immUpperBound); + HWIntrinsicInfo::lookupImmBounds(intrin->GetHWIntrinsicId(), indexedElementSimdSize, + intrin->GetSimdBaseType(), &immLowerBound, &immUpperBound); } else { - HWIntrinsicInfo::lookupImmBounds(intrin->gtHWIntrinsicId, intrin->GetSimdSize(), intrin->GetSimdBaseType(), - &immLowerBound, &immUpperBound); + HWIntrinsicInfo::lookupImmBounds(intrin->GetHWIntrinsicId(), intrin->GetSimdSize(), + intrin->GetSimdBaseType(), &immLowerBound, &immUpperBound); } nonConstImmReg = immOp->GetRegNum(); @@ -95,7 +95,7 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTre // these by // using the same approach as in hwintrinsicxarch.cpp - adding an additional indirection level in form of a // branch table. - assert(!HWIntrinsicInfo::GeneratesMultipleIns(intrin->gtHWIntrinsicId)); + assert(!HWIntrinsicInfo::GeneratesMultipleIns(intrin->GetHWIntrinsicId())); branchTargetReg = intrin->GetSingleTempReg(); } @@ -271,7 +271,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) const bool isRMW = node->isRMWHWIntrinsic(compiler); const bool hasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrin.id); - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); if (intrin.IsTableDriven()) { diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 0dc565d65575a..2e6f018ddaf6c 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -78,10 +78,10 @@ static bool genIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicC // void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CORINFO_InstructionSet isa = HWIntrinsicInfo::lookupIsa(intrinsicId); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); - int numArgs = HWIntrinsicInfo::lookupNumArgs(node); + size_t numArgs = node->GetOperandCount(); int ival = HWIntrinsicInfo::lookupIval(intrinsicId, compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)); @@ -89,11 +89,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (genIsTableDrivenHWIntrinsic(intrinsicId, category)) { - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = GetEmitter(); @@ -109,6 +111,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { case 1: { + op1 = node->Op(1); + if (node->OperIsMemoryLoad()) { genConsumeAddress(op1); @@ -150,6 +154,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case 2: { + op1 = node->Op(1); + op2 = node->Op(2); + if (category == HW_Category_MemoryStore) { genConsumeAddress(op1); @@ -158,13 +165,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { GenTreeHWIntrinsic* extract = op2->AsHWIntrinsic(); - assert((extract->gtHWIntrinsicId == NI_AVX_ExtractVector128) || - (extract->gtHWIntrinsicId == NI_AVX2_ExtractVector128)); + assert((extract->GetHWIntrinsicId() == NI_AVX_ExtractVector128) || + (extract->GetHWIntrinsicId() == NI_AVX2_ExtractVector128)); - regNumber regData = genConsumeReg(extract->gtGetOp1()); + regNumber regData = genConsumeReg(extract->Op(1)); - ins = HWIntrinsicInfo::lookupIns(extract->gtHWIntrinsicId, extract->GetSimdBaseType()); - ival = static_cast(extract->gtGetOp2()->AsIntCon()->IconValue()); + ins = HWIntrinsicInfo::lookupIns(extract->GetHWIntrinsicId(), extract->GetSimdBaseType()); + ival = static_cast(extract->Op(2)->AsIntCon()->IconValue()); GenTreeIndir indir = indirForm(TYP_SIMD16, op1); emit->emitIns_A_R_I(ins, EA_32BYTE, &indir, regData, ival); @@ -258,18 +265,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case 3: { - GenTreeArgList* argList = op1->AsArgList(); - op1 = argList->Current(); + op1 = node->Op(1); + op2 = node->Op(2); + op3 = node->Op(3); + genConsumeRegs(op1); op1Reg = op1->GetRegNum(); - argList = argList->Rest(); - op2 = argList->Current(); genConsumeRegs(op2); op2Reg = op2->GetRegNum(); - argList = argList->Rest(); - GenTree* op3 = argList->Current(); genConsumeRegs(op3); regNumber op3Reg = op3->GetRegNum(); @@ -432,7 +437,7 @@ void CodeGen::genHWIntrinsic_R_RM( if (rmOp->isContained() || rmOp->isUsedFromSpillTemp()) { - assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId())); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, rmOp); TempDsc* tmpDsc = nullptr; @@ -462,8 +467,8 @@ void CodeGen::genHWIntrinsic_R_RM( else { assert(rmOp->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(rmOp->AsHWIntrinsic()) == 1); - addr = rmOp->gtGetOp1(); + assert(rmOp->AsHWIntrinsic()->GetOperandCount() == 1); + addr = rmOp->AsHWIntrinsic()->Op(1); } switch (addr->OperGet()) @@ -554,7 +559,7 @@ void CodeGen::genHWIntrinsic_R_RM( void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, int8_t ival) { regNumber targetReg = node->GetRegNum(); - GenTree* op1 = node->gtGetOp1(); + GenTree* op1 = node->Op(1); // TODO-XArch-CQ: Commutative operations can have op1 be contained // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained @@ -564,7 +569,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, e if (op1->isContained() || op1->isUsedFromSpillTemp()) { - assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId())); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op1); } inst_RV_TT_IV(ins, simdSize, targetReg, op1, ival); @@ -582,8 +587,8 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, e void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr) { regNumber targetReg = node->GetRegNum(); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); regNumber op1Reg = op1->GetRegNum(); assert(targetReg != REG_NA); @@ -612,7 +617,7 @@ void CodeGen::genHWIntrinsic_R_R_RM( if (op2->isContained() || op2->isUsedFromSpillTemp()) { - assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId())); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); } @@ -632,29 +637,13 @@ void CodeGen::genHWIntrinsic_R_R_RM( void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, int8_t ival) { regNumber targetReg = node->GetRegNum(); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); emitter* emit = GetEmitter(); // TODO-XArch-CQ: Commutative operations can have op1 be contained // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained - if (op1->OperIsList()) - { - assert(op2 == nullptr); - - GenTreeArgList* argList = op1->AsArgList(); - - op1 = argList->Current(); - argList = argList->Rest(); - - op2 = argList->Current(); - argList = argList->Rest(); - - assert(argList->Current() != nullptr); - assert(argList->Rest() == nullptr); - } - regNumber op1Reg = op1->GetRegNum(); assert(targetReg != REG_NA); @@ -662,7 +651,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, if (op2->isContained() || op2->isUsedFromSpillTemp()) { - assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId())); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); TempDsc* tmpDsc = nullptr; @@ -692,8 +681,8 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, else { assert(op2->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(op2->AsHWIntrinsic()) == 1); - addr = op2->gtGetOp1(); + assert(op2->AsHWIntrinsic()->GetOperandCount() == 1); + addr = op2->AsHWIntrinsic()->Op(1); } switch (addr->OperGet()) @@ -795,25 +784,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize) { regNumber targetReg = node->GetRegNum(); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - GenTree* op3 = nullptr; + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); emitter* emit = GetEmitter(); - assert(op1->OperIsList()); - assert(op2 == nullptr); - - GenTreeArgList* argList = op1->AsArgList(); - - op1 = argList->Current(); - argList = argList->Rest(); - - op2 = argList->Current(); - argList = argList->Rest(); - - op3 = argList->Current(); - assert(argList->Rest() == nullptr); - regNumber op1Reg = op1->GetRegNum(); regNumber op3Reg = op3->GetRegNum(); @@ -823,7 +798,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, if (op2->isContained() || op2->isUsedFromSpillTemp()) { - assert(HWIntrinsicInfo::SupportsContainment(node->gtHWIntrinsicId)); + assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId())); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op2); TempDsc* tmpDsc = nullptr; @@ -855,8 +830,8 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, else { assert(op2->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(op2->AsHWIntrinsic()) == 1); - addr = op2->gtGetOp1(); + assert(op2->AsHWIntrinsic()->GetOperandCount() == 1); + addr = op2->AsHWIntrinsic()->Op(1); } switch (addr->OperGet()) @@ -982,8 +957,8 @@ void CodeGen::genHWIntrinsic_R_R_R_RM( else { assert(op3->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(op3->AsHWIntrinsic()) == 1); - addr = op3->gtGetOp1(); + assert(op3->AsHWIntrinsic()->GetOperandCount() == 1); + addr = op3->AsHWIntrinsic()->Op(1); } switch (addr->OperGet()) @@ -1138,17 +1113,17 @@ void CodeGen::genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsi // void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE)); assert((baseType >= TYP_BYTE) && (baseType <= TYP_DOUBLE)); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = (node->GetOperandCount() >= 1) ? node->Op(1) : nullptr; + GenTree* op2 = (node->GetOperandCount() >= 2) ? node->Op(2) : nullptr; - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); regNumber op1Reg = (op1 == nullptr) ? REG_NA : op1->GetRegNum(); emitter* emit = GetEmitter(); @@ -1395,13 +1370,11 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector128_get_Zero: case NI_Vector256_get_Zero: { - assert(op1 == nullptr); emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg); break; } case NI_Vector128_get_AllBitsSet: - assert(op1 == nullptr); if (varTypeIsFloating(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX)) { // The following corresponds to vcmptrueps pseudo-op and not available without VEX prefix. @@ -1414,7 +1387,6 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Vector256_get_AllBitsSet: - assert(op1 == nullptr); if (varTypeIsIntegral(baseType) && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX2)) { emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, targetReg, targetReg); @@ -1445,9 +1417,9 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); - genConsumeOperands(node); + genConsumeMultiOpOperands(node); switch (intrinsicId) { @@ -1456,7 +1428,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) case NI_X86Base_X64_BitScanForward: case NI_X86Base_X64_BitScanReverse: { - GenTree* op1 = node->gtGetOp1(); + GenTree* op1 = node->Op(1); regNumber targetReg = node->GetRegNum(); var_types targetType = node->TypeGet(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); @@ -1468,8 +1440,6 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) case NI_X86Base_Pause: { assert(node->GetSimdBaseType() == TYP_UNKNOWN); - assert(node->gtGetOp1() == nullptr); - assert(node->gtGetOp2() == nullptr); GetEmitter()->emitIns(INS_pause); break; } @@ -1490,17 +1460,13 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); var_types targetType = node->TypeGet(); var_types baseType = node->GetSimdBaseType(); + emitter* emit = GetEmitter(); - regNumber op1Reg = REG_NA; - emitter* emit = GetEmitter(); - - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); switch (intrinsicId) { @@ -1508,18 +1474,14 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_X64_ConvertToInt64WithTruncation: { assert(targetType == TYP_LONG); - assert(op1 != nullptr); - assert(op2 == nullptr); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, EA_8BYTE, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, EA_8BYTE, targetReg, node->Op(1)); break; } case NI_SSE_X64_ConvertScalarToVector128Single: { assert(baseType == TYP_LONG); - assert(op1 != nullptr); - assert(op2 != nullptr); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE); break; @@ -1531,21 +1493,17 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE_PrefetchNonTemporal: { assert(baseType == TYP_UBYTE); - assert(op2 == nullptr); // These do not support containment. - assert(!op1->isContained()); + assert(!node->Op(1)->isContained()); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, node->GetSimdBaseType()); - op1Reg = op1->GetRegNum(); - emit->emitIns_AR(ins, emitTypeSize(baseType), op1Reg, 0); + emit->emitIns_AR(ins, emitTypeSize(baseType), node->Op(1)->GetRegNum(), 0); break; } case NI_SSE_StoreFence: { assert(baseType == TYP_UNKNOWN); - assert(op1 == nullptr); - assert(op2 == nullptr); emit->emitIns(INS_sfence); break; } @@ -1566,24 +1524,19 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); var_types targetType = node->TypeGet(); var_types baseType = node->GetSimdBaseType(); - regNumber op1Reg = REG_NA; emitter* emit = GetEmitter(); - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); switch (intrinsicId) { case NI_SSE2_X64_ConvertScalarToVector128Double: { assert(baseType == TYP_LONG); - assert(op1 != nullptr); - assert(op2 != nullptr); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); genHWIntrinsic_R_R_RM(node, ins, EA_8BYTE); break; @@ -1593,10 +1546,8 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_X64_ConvertScalarToVector128UInt64: { assert(baseType == TYP_LONG || baseType == TYP_ULONG); - assert(op1 != nullptr); - assert(op2 == nullptr); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, op1); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, node->Op(1)); break; } @@ -1607,9 +1558,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_X64_ConvertToInt64WithTruncation: case NI_SSE2_X64_ConvertToUInt64: { - assert(op2 == nullptr); emitAttr attr; - if (varTypeIsIntegral(baseType)) { assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); @@ -1622,15 +1571,13 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) } instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, node->Op(1)); break; } case NI_SSE2_LoadFence: { assert(baseType == TYP_UNKNOWN); - assert(op1 == nullptr); - assert(op2 == nullptr); emit->emitIns(INS_lfence); break; } @@ -1638,8 +1585,6 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_MemoryFence: { assert(baseType == TYP_UNKNOWN); - assert(op1 == nullptr); - assert(op2 == nullptr); emit->emitIns(INS_mfence); break; } @@ -1648,11 +1593,8 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE2_X64_StoreNonTemporal: { assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG); - assert(op1 != nullptr); - assert(op2 != nullptr); - instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - GenTreeStoreInd store = storeIndirForm(node->TypeGet(), op1, op2); + GenTreeStoreInd store = storeIndirForm(node->TypeGet(), node->Op(1), node->Op(2)); emit->emitInsStoreInd(ins, emitTypeSize(baseType), &store); break; } @@ -1673,15 +1615,14 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + GenTree* op1 = node->Op(1); regNumber targetReg = node->GetRegNum(); var_types baseType = node->GetSimdBaseType(); emitter* emit = GetEmitter(); - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); switch (intrinsicId) { @@ -1710,6 +1651,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) { assert(!varTypeIsFloating(baseType)); + GenTree* op2 = node->Op(2); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); emitAttr attr = emitActualTypeSize(node->TypeGet()); @@ -1749,20 +1691,19 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); var_types baseType = node->GetSimdBaseType(); var_types targetType = node->TypeGet(); emitter* emit = GetEmitter(); - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); regNumber op1Reg = op1->GetRegNum(); assert(targetReg != REG_NA); assert(op1Reg != REG_NA); - assert(op2 != nullptr); assert(!node->OperIsCommutative()); switch (intrinsicId) @@ -1806,20 +1747,18 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); var_types targetType = node->TypeGet(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - int numArgs = HWIntrinsicInfo::lookupNumArgs(node); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + size_t numArgs = node->GetOperandCount(); + GenTree* op1 = node->Op(1); regNumber op1Reg = REG_NA; - regNumber op2Reg = REG_NA; regNumber targetReg = node->GetRegNum(); emitter* emit = GetEmitter(); - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); switch (intrinsicId) { @@ -1827,7 +1766,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_ConvertToUInt32: { op1Reg = op1->GetRegNum(); - assert(numArgs == 1); assert((baseType == TYP_INT) || (baseType == TYP_UINT)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); emit->emitIns_Mov(ins, emitActualTypeSize(baseType), targetReg, op1Reg, /* canSkip */ false); @@ -1859,24 +1797,13 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_GatherMaskVector128: case NI_AVX2_GatherMaskVector256: { - GenTreeArgList* list = op1->AsArgList(); - op1 = list->Current(); - op1Reg = op1->GetRegNum(); - - list = list->Rest(); - op2 = list->Current(); - op2Reg = op2->GetRegNum(); - - list = list->Rest(); - GenTree* op3 = list->Current(); - - list = list->Rest(); - GenTree* op4 = nullptr; + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); GenTree* lastOp = nullptr; GenTree* indexOp = nullptr; - regNumber op3Reg = REG_NA; - regNumber op4Reg = REG_NA; + op1Reg = op1->GetRegNum(); + regNumber op2Reg = op2->GetRegNum(); regNumber addrBaseReg = REG_NA; regNumber addrIndexReg = REG_NA; regNumber maskReg = node->ExtractTempReg(RBM_ALLFLOAT); @@ -1884,11 +1811,13 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node) if (numArgs == 5) { assert(intrinsicId == NI_AVX2_GatherMaskVector128 || intrinsicId == NI_AVX2_GatherMaskVector256); - op4 = list->Current(); - list = list->Rest(); - lastOp = list->Current(); - op3Reg = op3->GetRegNum(); - op4Reg = op4->GetRegNum(); + + GenTree* op4 = node->Op(4); + lastOp = node->Op(5); + + regNumber op3Reg = op3->GetRegNum(); + regNumber op4Reg = op4->GetRegNum(); + addrBaseReg = op2Reg; addrIndexReg = op3Reg; indexOp = op3; @@ -1985,18 +1914,15 @@ void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); var_types targetType = node->TypeGet(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); emitter* emit = GetEmitter(); assert(targetReg != REG_NA); - assert(op1 != nullptr); - genConsumeHWIntrinsicOperands(node); + genConsumeMultiOpOperands(node); switch (intrinsicId) { @@ -2011,7 +1937,6 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node) case NI_BMI2_ZeroHighBits: case NI_BMI2_X64_ZeroHighBits: { - assert(op2 != nullptr); assert((targetType == TYP_INT) || (targetType == TYP_LONG)); genHWIntrinsic_R_R_RM(node, ins, emitTypeSize(node->TypeGet())); break; @@ -2024,16 +1949,14 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node) case NI_BMI1_X64_GetMaskUpToLowestSetBit: case NI_BMI1_X64_ResetLowestSetBit: { - assert(op2 == nullptr); assert((targetType == TYP_INT) || (targetType == TYP_LONG)); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, op1); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, node->Op(1)); break; } case NI_BMI1_TrailingZeroCount: case NI_BMI1_X64_TrailingZeroCount: { - assert(op2 == nullptr); assert((targetType == TYP_INT) || (targetType == TYP_LONG)); genXCNTIntrinsic(node, ins); break; @@ -2042,32 +1965,26 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node) case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: { - int numArgs = HWIntrinsicInfo::lookupNumArgs(node); + size_t numArgs = node->GetOperandCount(); assert(numArgs == 2 || numArgs == 3); - regNumber op1Reg = REG_NA; - regNumber op2Reg = REG_NA; + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + + regNumber op1Reg = op1->GetRegNum(); + regNumber op2Reg = op2->GetRegNum(); regNumber op3Reg = REG_NA; regNumber lowReg = REG_NA; if (numArgs == 2) { - op1Reg = op1->GetRegNum(); - op2Reg = op2->GetRegNum(); lowReg = targetReg; } else { - GenTreeArgList* argList = op1->AsArgList(); - op1 = argList->Current(); - op1Reg = op1->GetRegNum(); - argList = argList->Rest(); - op2 = argList->Current(); - op2Reg = op2->GetRegNum(); - argList = argList->Rest(); - GenTree* op3 = argList->Current(); - op3Reg = op3->GetRegNum(); - assert(!op3->isContained()); + op3Reg = node->Op(3)->GetRegNum(); + + assert(!node->Op(3)->isContained()); assert(op3Reg != op1Reg); assert(op3Reg != targetReg); assert(op3Reg != REG_EDX); @@ -2114,24 +2031,16 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types baseType = node->GetSimdBaseType(); emitAttr attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(node->GetSimdSize())); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - GenTree* op1 = node->gtGetOp1(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); regNumber targetReg = node->GetRegNum(); - assert(HWIntrinsicInfo::lookupNumArgs(node) == 3); - - genConsumeHWIntrinsicOperands(node); - GenTreeArgList* argList = op1->AsArgList(); - op1 = argList->Current(); - - argList = argList->Rest(); - GenTree* op2 = argList->Current(); - - argList = argList->Rest(); - GenTree* op3 = argList->Current(); + genConsumeMultiOpOperands(node); regNumber op1Reg; regNumber op2Reg; @@ -2197,10 +2106,10 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node) { - assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount || - node->gtHWIntrinsicId == NI_LZCNT_X64_LeadingZeroCount); + assert((node->GetHWIntrinsicId() == NI_LZCNT_LeadingZeroCount) || + (node->GetHWIntrinsicId() == NI_LZCNT_X64_LeadingZeroCount)); - genConsumeOperands(node); + genConsumeMultiOpOperands(node); genXCNTIntrinsic(node, INS_lzcnt); genProduceReg(node); } @@ -2224,9 +2133,9 @@ void CodeGen::genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node) // void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node) { - assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount || node->gtHWIntrinsicId == NI_POPCNT_X64_PopCount); + assert(node->GetHWIntrinsicId() == NI_POPCNT_PopCount || node->GetHWIntrinsicId() == NI_POPCNT_X64_PopCount); - genConsumeOperands(node); + genConsumeMultiOpOperands(node); genXCNTIntrinsic(node, INS_popcnt); genProduceReg(node); } @@ -2245,7 +2154,7 @@ void CodeGen::genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins) // (POPCNT only) processors, so insert a `XOR target, target` to break the dependency via XOR triggering register // renaming, but only if it's not an actual dependency. - GenTree* op1 = node->gtGetOp1(); + GenTree* op1 = node->Op(1); regNumber sourceReg1 = REG_NA; regNumber sourceReg2 = REG_NA; diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index a565d7b385544..72c9734afc48b 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -864,31 +864,14 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, } #endif // TARGET_X86 - if (sig->numArgs == 1) - { - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - } - else if (sig->numArgs == 2) + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), sig->numArgs); + + for (int i = sig->numArgs - 1; i >= 0; i--) { - op2 = impPopStack().val; - op1 = impPopStack().val; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + nodeBuilder.AddOperand(i, impPopStack().val); } - else - { - assert(sig->numArgs >= 3); - - GenTreeArgList* tmp = nullptr; - for (unsigned i = 0; i < sig->numArgs; i++) - { - tmp = gtNewListNode(impPopStack().val, tmp); - } - - op1 = tmp; - retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize); - } + retNode = gtNewSimdHWIntrinsicNode(retType, std::move(nodeBuilder), intrinsic, simdBaseJitType, simdSize); break; } @@ -1767,9 +1750,11 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic, CORINFO_METHO op1 = getArgForHWIntrinsic(argType, argClass); SetOpLclRelatedToSIMDIntrinsic(op1); - GenTree* opList = new (this, GT_LIST) GenTreeArgList(op1, gtNewArgList(op2, op3, op4, op5)); - retNode = new (this, GT_HWINTRINSIC) GenTreeHWIntrinsic(retType, opList, intrinsic, simdBaseJitType, - simdSize, /* isSimdAsHWIntrinsic */ false); + const bool isSimdAsHWIntrinsic = false; + + retNode = new (this, GT_HWINTRINSIC) + GenTreeHWIntrinsic(retType, getAllocator(CMK_ASTNode), intrinsic, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic, op1, op2, op3, op4, op5); retNode->AsHWIntrinsic()->SetAuxiliaryJitType(indexBaseJitType); break; } diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 0435cee9cbacd..5dc6234a55ba1 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -20639,7 +20639,7 @@ GenTree* Compiler::impInlineFetchArg(unsigned lclNum, InlArgInfo* inlArgInfo, In // Enable for all parameterless (=invariant) hw intrinsics such as // Vector128<>.Zero and Vector256<>.AllBitSets. We might consider // doing that for Vector.Create(cns) as well. - if ((argNode->gtGetOp1() == nullptr) && (argNode->gtGetOp2() == nullptr)) + if (argNode->AsHWIntrinsic()->GetOperandCount() == 0) { substitute = true; } diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 06ee07c405b6b..cd24aeca1fa97 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1049,8 +1049,8 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT { #if defined(FEATURE_HW_INTRINSICS) assert(rmOp->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(rmOp->AsHWIntrinsic()) == 1); - addr = rmOp->gtGetOp1(); + assert(rmOp->AsHWIntrinsic()->GetOperandCount() == 1); + addr = rmOp->AsHWIntrinsic()->Op(1); #else unreached(); #endif // FEATURE_HW_INTRINSICS @@ -1178,8 +1178,8 @@ void CodeGen::inst_RV_RV_TT( { #if defined(FEATURE_HW_INTRINSICS) assert(op2->AsHWIntrinsic()->OperIsMemoryLoad()); - assert(HWIntrinsicInfo::lookupNumArgs(op2->AsHWIntrinsic()) == 1); - addr = op2->gtGetOp1(); + assert(op2->AsHWIntrinsic()->GetOperandCount() == 1); + addr = op2->AsHWIntrinsic()->Op(1); #else unreached(); #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index e72d834d2c593..97f9e4f4807d2 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1486,7 +1486,7 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const // Some nodes should never be marked unused, as they must be contained in the backend. // These may be marked as unused during dead code elimination traversal, but they *must* be subsequently // removed. - assert(!node->IsUnusedValue() || !node->OperIs(GT_FIELD_LIST, GT_LIST, GT_INIT_VAL)); + assert(!node->IsUnusedValue() || !node->OperIs(GT_FIELD_LIST, GT_INIT_VAL)); // Verify that the REVERSE_OPS flag is not set. NOTE: if we ever decide to reuse the bit assigned to // GTF_REVERSE_OPS for an LIR-only flag we will need to move this check to the points at which we diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index b524efa4790b8..345edc402cc3f 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -609,27 +609,25 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) // void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node) { - assert(node->gtHWIntrinsicId == NI_AdvSimd_FusedMultiplyAddScalar); + assert(node->GetHWIntrinsicId() == NI_AdvSimd_FusedMultiplyAddScalar); - const HWIntrinsic intrin(node); - - GenTree* op1 = intrin.op1; - GenTree* op2 = intrin.op2; - GenTree* op3 = intrin.op3; + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); auto lowerOperand = [this](GenTree* op) { bool wasNegated = false; if (op->OperIsHWIntrinsic() && - ((op->AsHWIntrinsic()->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) || - (op->AsHWIntrinsic()->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe))) + ((op->AsHWIntrinsic()->GetHWIntrinsicId() == NI_AdvSimd_Arm64_DuplicateToVector64) || + (op->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe))) { GenTreeHWIntrinsic* createVector64 = op->AsHWIntrinsic(); - GenTree* valueOp = createVector64->gtGetOp1(); + GenTree* valueOp = createVector64->Op(1); if (valueOp->OperIs(GT_NEG)) { - createVector64->gtOp1 = valueOp->gtGetOp1(); + createVector64->Op(1) = valueOp->gtGetOp1(); BlockRange().Remove(valueOp); wasNegated = true; } @@ -646,16 +644,16 @@ void Lowering::LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node) { if (op2WasNegated != op3WasNegated) { - node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplyAddNegatedScalar; + node->ChangeHWIntrinsicId(NI_AdvSimd_FusedMultiplyAddNegatedScalar); } else { - node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplySubtractNegatedScalar; + node->ChangeHWIntrinsicId(NI_AdvSimd_FusedMultiplySubtractNegatedScalar); } } else if (op2WasNegated != op3WasNegated) { - node->gtHWIntrinsicId = NI_AdvSimd_FusedMultiplySubtractScalar; + node->ChangeHWIntrinsicId(NI_AdvSimd_FusedMultiplySubtractScalar); } } @@ -676,7 +674,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->gtType = TYP_SIMD16; } - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); switch (intrinsicId) { @@ -689,7 +687,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // the same intrinsic as when it came in. LowerHWIntrinsicCreate(node); - assert(!node->OperIsHWIntrinsic() || (node->gtHWIntrinsicId != intrinsicId)); + assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId)); LowerNode(node); return; } @@ -739,18 +737,19 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // This check may end up modifying node->gtOp1 if it is a cast node that can be removed bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) { - assert((node->gtHWIntrinsicId == NI_Vector64_Create) || (node->gtHWIntrinsicId == NI_Vector128_Create) || - (node->gtHWIntrinsicId == NI_Vector64_CreateScalarUnsafe) || - (node->gtHWIntrinsicId == NI_Vector128_CreateScalarUnsafe) || - (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector64) || - (node->gtHWIntrinsicId == NI_AdvSimd_DuplicateToVector128) || - (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector64) || - (node->gtHWIntrinsicId == NI_AdvSimd_Arm64_DuplicateToVector128)); - assert(HWIntrinsicInfo::lookupNumArgs(node) == 1); - - GenTree* op1 = node->gtOp1; + assert((node->GetHWIntrinsicId() == NI_Vector64_Create) || (node->GetHWIntrinsicId() == NI_Vector128_Create) || + (node->GetHWIntrinsicId() == NI_Vector64_CreateScalarUnsafe) || + (node->GetHWIntrinsicId() == NI_Vector128_CreateScalarUnsafe) || + (node->GetHWIntrinsicId() == NI_AdvSimd_DuplicateToVector64) || + (node->GetHWIntrinsicId() == NI_AdvSimd_DuplicateToVector128) || + (node->GetHWIntrinsicId() == NI_AdvSimd_Arm64_DuplicateToVector64) || + (node->GetHWIntrinsicId() == NI_AdvSimd_Arm64_DuplicateToVector128)); + assert(node->GetOperandCount() == 1); + + GenTree* op1 = node->Op(1); GenTree* castOp = nullptr; + // TODO-Casts: why don't we fold the casts? MinOpts? if (varTypeIsIntegral(node->GetSimdBaseType()) && op1->OperIs(GT_CAST)) { // We will sometimes get a cast around a constant value (such as for @@ -773,8 +772,8 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) // We found a containable immediate under // a cast, so remove the cast from the LIR. - BlockRange().Remove(node->gtOp1); - node->gtOp1 = op1; + BlockRange().Remove(node->Op(1)); + node->Op(1) = op1; } return true; } @@ -800,7 +799,7 @@ bool Lowering::IsValidConstForMovImm(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -820,8 +819,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // /--* op1 simd // node = * HWINTRINSIC simd T op_Equality - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); NamedIntrinsic cmpIntrinsic; @@ -895,9 +894,9 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) node->ChangeOper(cmpOp); - node->gtType = TYP_INT; - node->gtOp1 = val; - node->gtOp2 = zroCns; + node->gtType = TYP_INT; + node->AsOp()->gtOp1 = val; + node->AsOp()->gtOp2 = zroCns; // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false) @@ -915,13 +914,20 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) //---------------------------------------------------------------------------------------------- // Lowering::LowerHWIntrinsicCreate: Lowers a Vector64 or Vector128 Create call // +// Performs the following transformations: +// 1. If all the arguments are constant (including the broadcast case), the vector +// will be loaded from the data section, or turned into Zero/AllBitsSet, if possible. +// 2. Non-constant broadcasts (argCnt == 1) are turned into DuplicateToVector intrinsics. +// 3. Remaining cases get a chain of "Insert"s, from the second element to the last, where +// the vector to be inserted into is created with CreateUnsafeScalar from the first element. +// // Arguments: // node - The hardware intrinsic node. // void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - var_types simdType = node->gtType; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + var_types simdType = node->TypeGet(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -938,109 +944,47 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - GenTreeArgList* argList = nullptr; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - // Spare GenTrees to be used for the lowering logic below - // Defined upfront to avoid naming conflicts, etc... - GenTree* idx = nullptr; - GenTree* tmp1 = nullptr; - GenTree* tmp2 = nullptr; - GenTree* tmp3 = nullptr; - - assert(op1 != nullptr); - - unsigned argCnt = 0; - unsigned cnsArgCnt = 0; + size_t argCnt = node->GetOperandCount(); + size_t cnsArgCnt = 0; - if (op1->OperIsList()) + // These intrinsics are meant to set the same value to every element. + if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType)) { - assert(op2 == nullptr); - - for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) + // Now assign the rest of the arguments. + for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) { - if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; + HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType); } + + cnsArgCnt = 1; } else { - if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; - - if (op2 != nullptr) - { - if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; - } - else if (cnsArgCnt == 1) + for (unsigned i = 1; i <= argCnt; i++) { - // These intrinsics are meant to set the same value to every element - // so we'll just specially handle it here and copy it into the remaining - // indices. - - for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) + if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType)) { - HandleArgForHWIntrinsicCreate(op1, i, vecCns, simdBaseType); + cnsArgCnt++; } } } assert((argCnt == 1) || (argCnt == (simdSize / genTypeSize(simdBaseType)))); - if ((argCnt == cnsArgCnt) && (argCnt == 1)) + // Check if we have a cast that we can remove. Note that "IsValidConstForMovImm" + // will reset Op(1) if it finds such a cast, so we do not need to handle it here. + // TODO-Casts: why are casts from constants checked for here? + if ((argCnt == cnsArgCnt) && (argCnt == 1) && IsValidConstForMovImm(node)) { - GenTree* castOp = nullptr; - - if (varTypeIsIntegral(simdBaseType) && op1->OperIs(GT_CAST)) - { - // We will sometimes get a cast around a constant value (such as for - // certain long constants) which would block the below containment. - // So we will temporarily check what the cast is from instead so we - // can catch those cases as well. - - castOp = op1->AsCast()->CastOp(); - op1 = castOp; - } - - if (IsValidConstForMovImm(node)) - { - // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector - // intrinsic, which will itself mark the node as contained. - cnsArgCnt = 0; - - // Reacquire op1 as the above check may have removed a cast node and - // changed op1. - op1 = node->gtOp1; - } + // Set the cnsArgCnt to zero so we get lowered to a DuplicateToVector + // intrinsic, which will itself mark the node as contained. + cnsArgCnt = 0; } if (argCnt == cnsArgCnt) { - if (op1->OperIsList()) + for (GenTree* arg : node->Operands()) { - for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) - { - BlockRange().Remove(argList->Current()); - } - } - else - { - BlockRange().Remove(op1); - - if (op2 != nullptr) - { - BlockRange().Remove(op2); - } + BlockRange().Remove(arg); } assert((simdSize == 8) || (simdSize == 16)); @@ -1052,16 +996,12 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (vecCns.i64[0] == 0) { - node->gtOp1 = nullptr; - node->gtOp2 = nullptr; - node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero; + node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_Zero : NI_Vector128_get_Zero); return; } else if (vecCns.i64[0] == -1) { - node->gtOp1 = nullptr; - node->gtOp2 = nullptr; - node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet; + node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_get_AllBitsSet : NI_Vector128_get_AllBitsSet); return; } } @@ -1076,7 +1016,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, clsVarAddr); node->ChangeOper(GT_IND); - node->gtOp1 = clsVarAddr; + node->AsOp()->gtOp1 = clsVarAddr; // TODO-ARM64-CQ: We should be able to modify at least the paths that use Insert to trivially support partial // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just @@ -1099,13 +1039,13 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (varTypeIsLong(simdBaseType) || (simdBaseType == TYP_DOUBLE)) { - node->gtHWIntrinsicId = - (simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 : NI_AdvSimd_Arm64_DuplicateToVector128; + node->ChangeHWIntrinsicId((simdType == TYP_SIMD8) ? NI_AdvSimd_Arm64_DuplicateToVector64 + : NI_AdvSimd_Arm64_DuplicateToVector128); } else { - node->gtHWIntrinsicId = - (simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 : NI_AdvSimd_DuplicateToVector128; + node->ChangeHWIntrinsicId((simdType == TYP_SIMD8) ? NI_AdvSimd_DuplicateToVector64 + : NI_AdvSimd_DuplicateToVector128); } return; } @@ -1116,13 +1056,6 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // +--* opN T // node = * HWINTRINSIC simd T Create - if (op1->OperIsList()) - { - argList = op1->AsArgList(); - op1 = argList->Current(); - argList = argList->Rest(); - } - // We will be constructing the following parts: // /--* op1 T // tmp1 = * HWINTRINSIC simd8 T CreateScalarUnsafe @@ -1132,67 +1065,50 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp1 = Vector64.CreateScalarUnsafe(op1); // ... - NamedIntrinsic createScalarUnsafe = + NamedIntrinsic createScalar = (simdType == TYP_SIMD8) ? NI_Vector64_CreateScalarUnsafe : NI_Vector128_CreateScalarUnsafe; - tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op1, createScalarUnsafe, simdBaseJitType, simdSize); - BlockRange().InsertAfter(op1, tmp1); + GenTree* tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, node->Op(1), createScalar, simdBaseJitType, simdSize); + BlockRange().InsertAfter(node->Op(1), tmp1); LowerNode(tmp1); + // We will be constructing the following parts: + // ... + // idx = CNS_INT int N + // /--* tmp1 simd + // +--* idx int + // +--* opN T + // tmp1 = * HWINTRINSIC simd T Insert + // ... + + // This is roughly the following managed code: + // ... + // tmp1 = AdvSimd.Insert(tmp1, N, opN); + // ... + unsigned N = 0; GenTree* opN = nullptr; + GenTree* idx = nullptr; for (N = 1; N < argCnt - 1; N++) { - // We will be constructing the following parts: - // ... - // idx = CNS_INT int N - // /--* tmp1 simd - // +--* idx int - // +--* opN T - // tmp1 = * HWINTRINSIC simd T Insert - // ... - - // This is roughly the following managed code: - // ... - // tmp1 = AdvSimd.Insert(tmp1, N, opN); - // ... - - opN = argList->Current(); - - idx = comp->gtNewIconNode(N, TYP_INT); + opN = node->Op(N + 1); + idx = comp->gtNewIconNode(N); BlockRange().InsertBefore(opN, idx); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, tmp1, idx, opN, NI_AdvSimd_Insert, simdBaseJitType, simdSize); BlockRange().InsertAfter(opN, tmp1); LowerNode(tmp1); - - argList = argList->Rest(); } assert(N == (argCnt - 1)); - // We will be constructing the following parts: - // idx = CNS_INT int N - // /--* tmp1 simd - // +--* idx int - // +--* opN T - // node = * HWINTRINSIC simd T Insert - - // This is roughly the following managed code: - // ... - // tmp1 = AdvSimd.Insert(tmp1, N, opN); - // ... - - opN = (argCnt == 2) ? op2 : argList->Current(); - - idx = comp->gtNewIconNode(N, TYP_INT); + // For the last insert, we will reuse the existing node and so handle it here, outside the loop. + opN = node->Op(argCnt); + idx = comp->gtNewIconNode(N); BlockRange().InsertBefore(opN, idx); - node->gtOp1 = comp->gtNewArgList(tmp1, idx, opN); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_AdvSimd_Insert; + node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp1, idx, opN); } //---------------------------------------------------------------------------------------------- @@ -1203,7 +1119,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -1214,12 +1130,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - assert(op1 != nullptr); - assert(op2 != nullptr); - assert(!op1->OperIsList()); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); // Spare GenTrees to be used for the lowering logic below // Defined upfront to avoid naming conflicts, etc... @@ -1306,10 +1218,10 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // ... - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -1387,10 +1299,10 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // var tmp1 = AdvSimd.Arm64.AddPairwise(tmp1, tmp2); // ... - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -1433,12 +1345,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // ... // return tmp2.ToScalar(); - node->gtOp1 = tmp2; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = (simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar; + node->ResetHWIntrinsicId((simdSize == 8) ? NI_Vector64_ToScalar : NI_Vector128_ToScalar, tmp2); LowerNode(node); - return; } #endif // FEATURE_HW_INTRINSICS @@ -1706,7 +1614,7 @@ void Lowering::ContainCheckStoreLoc(GenTreeLclVarCommon* storeLoc) const MakeSrcContained(storeLoc, op1); if (op1->IsSIMDZero()) { - MakeSrcContained(op1, op1->gtGetOp1()); + MakeSrcContained(op1, op1->AsSIMD()->Op(1)); } } return; @@ -1790,11 +1698,11 @@ void Lowering::ContainCheckBoundsChk(GenTreeBoundsChk* node) // void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { - switch (simdNode->gtSIMDIntrinsicID) + switch (simdNode->GetSIMDIntrinsicId()) { case SIMDIntrinsicInit: { - GenTree* op1 = simdNode->AsOp()->gtOp1; + GenTree* op1 = simdNode->Op(1); if (op1->IsIntegralConst(0)) { MakeSrcContained(simdNode, op1); @@ -1804,7 +1712,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); + CheckImmedAndMakeContained(simdNode, simdNode->Op(2)); break; default: @@ -1929,10 +1837,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_Arm64_DuplicateToVector128: if (IsValidConstForMovImm(node)) { - // Use node->gtOp1 as the above check may - // have removed a cast node and changed op1 - - MakeSrcContained(node, node->gtOp1); + MakeSrcContained(node, node->Op(1)); } break; diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 8f15e9a04fb8e..19a12f920ad66 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -750,35 +750,30 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) simdNode->gtType = TYP_SIMD16; } - if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN) + if (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN) { assert(simdNode->GetSimdBaseType() == TYP_FLOAT); - int argCount = 0; - int constArgCount = 0; - float constArgValues[4]{0, 0, 0, 0}; + size_t argCount = simdNode->GetOperandCount(); + size_t constArgCount = 0; + float constArgValues[4]{0, 0, 0, 0}; - for (GenTreeArgList* list = simdNode->gtGetOp1()->AsArgList(); list != nullptr; list = list->Rest()) + for (GenTree* arg : simdNode->Operands()) { - GenTree* arg = list->Current(); - - assert(arg->TypeGet() == simdNode->GetSimdBaseType()); - assert(argCount < (int)_countof(constArgValues)); + assert(arg->TypeIs(simdNode->GetSimdBaseType())); if (arg->IsCnsFltOrDbl()) { constArgValues[constArgCount] = static_cast(arg->AsDblCon()->gtDconVal); constArgCount++; } - - argCount++; } if (constArgCount == argCount) { - for (GenTreeArgList* list = simdNode->gtGetOp1()->AsArgList(); list != nullptr; list = list->Rest()) + for (GenTree* arg : simdNode->Operands()) { - BlockRange().Remove(list->Current()); + BlockRange().Remove(arg); } assert(sizeof(constArgValues) == 16); @@ -791,7 +786,7 @@ void Lowering::LowerSIMD(GenTreeSIMD* simdNode) GenTree* clsVarAddr = new (comp, GT_CLS_VAR_ADDR) GenTreeClsVar(GT_CLS_VAR_ADDR, TYP_I_IMPL, hnd, nullptr); BlockRange().InsertBefore(simdNode, clsVarAddr); simdNode->ChangeOper(GT_IND); - simdNode->gtOp1 = clsVarAddr; + simdNode->AsOp()->gtOp1 = clsVarAddr; ContainCheckIndir(simdNode->AsIndir()); return; @@ -817,8 +812,9 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn { GenTreeCC* cc = LowerNodeCC(node, condition); - node->gtHWIntrinsicId = newIntrinsicId; - node->gtType = TYP_VOID; + assert(HWIntrinsicInfo::lookupNumArgs(newIntrinsicId) == 2); + node->ChangeHWIntrinsicId(newIntrinsicId); + node->gtType = TYP_VOID; node->ClearUnusedValue(); bool swapOperands = false; @@ -867,8 +863,8 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn bool op1SupportsRegOptional = false; bool op2SupportsRegOptional = false; - if (!IsContainableHWIntrinsicOp(node, node->gtGetOp2(), &op2SupportsRegOptional) && - IsContainableHWIntrinsicOp(node, node->gtGetOp1(), &op1SupportsRegOptional)) + if (!IsContainableHWIntrinsicOp(node, node->Op(2), &op2SupportsRegOptional) && + IsContainableHWIntrinsicOp(node, node->Op(1), &op1SupportsRegOptional)) { // Swap operands if op2 cannot be contained but op1 can. swapOperands = true; @@ -877,7 +873,7 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn if (swapOperands) { - std::swap(node->gtOp1, node->gtOp2); + std::swap(node->Op(1), node->Op(2)); if (cc != nullptr) { @@ -908,53 +904,44 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn // void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) { - assert(node->gtHWIntrinsicId == NI_FMA_MultiplyAddScalar); - GenTreeArgList* argList = node->gtGetOp1()->AsArgList(); + assert(node->GetHWIntrinsicId() == NI_FMA_MultiplyAddScalar); GenTreeHWIntrinsic* createScalarOps[3]; - for (GenTreeHWIntrinsic*& createScalarOp : createScalarOps) + for (size_t i = 1; i <= 3; i++) { - GenTree*& current = argList->Current(); - assert(current != nullptr); - if (!current->OperIsHWIntrinsic()) - { - return; // Math(F).FusedMultiplyAdd is expected to emit three NI_Vector128_CreateScalarUnsafe - // but it's also possible to use NI_FMA_MultiplyAddScalar directly with any operands - } - GenTreeHWIntrinsic* hwArg = current->AsHWIntrinsic(); - if (hwArg->gtHWIntrinsicId != NI_Vector128_CreateScalarUnsafe) + GenTree* arg = node->Op(i); + if (!arg->OperIsHWIntrinsic() || (arg->AsHWIntrinsic()->GetHWIntrinsicId() != NI_Vector128_CreateScalarUnsafe)) { return; } - createScalarOp = hwArg; - argList = argList->Rest(); + + createScalarOps[i - 1] = arg->AsHWIntrinsic(); } - assert(argList == nullptr); - GenTree* argX = createScalarOps[0]->gtGetOp1(); - GenTree* argY = createScalarOps[1]->gtGetOp1(); - GenTree* argZ = createScalarOps[2]->gtGetOp1(); + GenTree* argX = createScalarOps[0]->Op(1); + GenTree* argY = createScalarOps[1]->Op(1); + GenTree* argZ = createScalarOps[2]->Op(1); const bool negMul = argX->OperIs(GT_NEG) != argY->OperIs(GT_NEG); if (argX->OperIs(GT_NEG)) { - createScalarOps[0]->gtOp1 = argX->gtGetOp1(); + createScalarOps[0]->Op(1) = argX->gtGetOp1(); BlockRange().Remove(argX); } if (argY->OperIs(GT_NEG)) { - createScalarOps[1]->gtOp1 = argY->gtGetOp1(); + createScalarOps[1]->Op(1) = argY->gtGetOp1(); BlockRange().Remove(argY); } if (argZ->OperIs(GT_NEG)) { - createScalarOps[2]->gtOp1 = argZ->gtGetOp1(); + createScalarOps[2]->Op(1) = argZ->gtGetOp1(); BlockRange().Remove(argZ); - node->gtHWIntrinsicId = negMul ? NI_FMA_MultiplySubtractNegatedScalar : NI_FMA_MultiplySubtractScalar; + node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplySubtractNegatedScalar : NI_FMA_MultiplySubtractScalar); } else { - node->gtHWIntrinsicId = negMul ? NI_FMA_MultiplyAddNegatedScalar : NI_FMA_MultiplyAddScalar; + node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplyAddNegatedScalar : NI_FMA_MultiplyAddScalar); } } @@ -973,7 +960,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) node->gtType = TYP_SIMD16; } - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); switch (intrinsicId) { @@ -988,7 +975,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // intrinsics that are not Vector*.Create LowerHWIntrinsicCreate(node); - assert(!node->OperIsHWIntrinsic() || (node->gtHWIntrinsicId != intrinsicId)); + assert(!node->OperIsHWIntrinsic() || (node->GetHWIntrinsicId() != intrinsicId)); LowerNode(node); return; } @@ -1005,8 +992,8 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) { LowerHWIntrinsicGetElement(node); - if ((node->gtHWIntrinsicId == NI_Vector128_GetElement) || - (node->gtHWIntrinsicId == NI_Vector256_GetElement)) + if ((node->GetHWIntrinsicId() == NI_Vector128_GetElement) || + (node->GetHWIntrinsicId() == NI_Vector256_GetElement)) { // Most NI_Vector*_GetElement intrinsics are lowered to // alternative nodes, such as the Extract intrinsics, @@ -1053,11 +1040,9 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) if (varTypeIsFloating(node->GetSimdBaseType())) { assert(node->GetSimdBaseType() == TYP_FLOAT); - assert(node->gtOp1 != nullptr); - assert(node->gtOp2 != nullptr); assert(node->GetSimdSize() == 16); - GenTree* op2 = node->gtGetOp2(); + GenTree* op2 = node->Op(2); if (!op2->OperIsConst()) { @@ -1071,10 +1056,10 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(msk, tmp); LowerNode(tmp); - node->gtOp2 = tmp; + node->Op(2) = tmp; } - node->gtHWIntrinsicId = NI_Vector128_GetElement; + node->ChangeHWIntrinsicId(NI_Vector128_GetElement); LowerNode(node); } break; @@ -1084,29 +1069,26 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_SSE41_Insert: case NI_SSE41_X64_Insert: { - assert(HWIntrinsicInfo::lookupNumArgs(node) == 3); - - GenTreeArgList* argList = node->gtOp1->AsArgList(); + assert(node->GetOperandCount() == 3); // Insert takes either a 32-bit register or a memory operand. // In either case, only SimdBaseType bits are read and so // widening or narrowing the operand may be unnecessary and it // can just be used directly. - - argList->Rest()->gtOp1 = TryRemoveCastIfPresent(node->GetSimdBaseType(), argList->Rest()->gtOp1); + node->Op(2) = TryRemoveCastIfPresent(node->GetSimdBaseType(), node->Op(2)); break; } case NI_SSE42_Crc32: { - assert(HWIntrinsicInfo::lookupNumArgs(node) == 2); + assert(node->GetOperandCount() == 2); // Crc32 takes either a bit register or a memory operand. // In either case, only gtType bits are read and so widening // or narrowing the operand may be unnecessary and it can // just be used directly. - node->gtOp2 = TryRemoveCastIfPresent(node->gtType, node->gtOp2); + node->Op(2) = TryRemoveCastIfPresent(node->TypeGet(), node->Op(2)); break; } @@ -1137,7 +1119,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } // pre-AVX doesn't actually support these intrinsics in hardware so we need to swap the operands around - std::swap(node->gtOp1, node->gtOp2); + std::swap(node->Op(1), node->Op(2)); break; } @@ -1152,7 +1134,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) assert(varTypeIsIntegral(node->GetSimdBaseType())); // this isn't actually supported in hardware so we need to swap the operands around - std::swap(node->gtOp1, node->gtOp2); + std::swap(node->Op(1), node->Op(2)); break; } @@ -1272,7 +1254,7 @@ void Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -1292,8 +1274,8 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // /--* op1 simd // node = * HWINTRINSIC simd T op_Equality - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; @@ -1303,34 +1285,35 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // just use PTEST. We can't support it for floating-point, however, // as it has both +0.0 and -0.0 where +0.0 == -0.0 - node->gtOp1 = op1; + node->Op(1) = op1; BlockRange().Remove(op2); - op2 = op2->AsOp()->gtGetOp1(); - - if (op2 != nullptr) + if (op2->AsMultiOp()->GetOperandCount() == 1) { // Some zero vectors are Create/Initialization nodes with a constant zero operand // We should also remove this to avoid dead code - BlockRange().Remove(op2); + assert(op2->AsMultiOp()->Op(1)->IsIntegralConst(0)); + BlockRange().Remove(op2->AsMultiOp()->Op(1)); } - LIR::Use op1Use(BlockRange(), &node->gtOp1, node); + LIR::Use op1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(op1Use); - op1 = node->gtOp1; + op1 = node->Op(1); op2 = comp->gtClone(op1); BlockRange().InsertAfter(op1, op2); - node->gtOp2 = op2; + node->Op(2) = op2; if (simdSize == 32) { - node->gtHWIntrinsicId = NI_AVX_TestZ; + // TODO-Review: LowerHWIntrinsicCC resets the id again, so why is this needed? + node->ChangeHWIntrinsicId(NI_AVX_TestZ); LowerHWIntrinsicCC(node, NI_AVX_PTEST, cmpCnd); } else { - node->gtHWIntrinsicId = NI_SSE41_TestZ; + // TODO-Review: LowerHWIntrinsicCC resets the id again, so why is this needed? + node->ChangeHWIntrinsicId(NI_SSE41_TestZ); LowerHWIntrinsicCC(node, NI_SSE41_PTEST, cmpCnd); } @@ -1492,10 +1475,9 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) } node->ChangeOper(cmpOp); - - node->gtType = TYP_INT; - node->gtOp1 = msk; - node->gtOp2 = mskCns; + node->ChangeType(TYP_INT); + node->AsOp()->gtOp1 = msk; + node->AsOp()->gtOp2 = mskCns; GenTree* cc = LowerNodeCC(node, cmpCnd); @@ -1513,7 +1495,7 @@ void Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp) // void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types simdType = node->gtType; CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); @@ -1531,9 +1513,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - GenTreeArgList* argList = nullptr; - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); + GenTree* op1 = node->Op(1); // Spare GenTrees to be used for the lowering logic below // Defined upfront to avoid naming conflicts, etc... @@ -1542,49 +1522,27 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) GenTree* tmp2 = nullptr; GenTree* tmp3 = nullptr; - assert(op1 != nullptr); - - unsigned argCnt = 0; - unsigned cnsArgCnt = 0; + size_t argCnt = node->GetOperandCount(); + size_t cnsArgCnt = 0; - if (op1->OperIsList()) + // These intrinsics are meant to set the same value to every element. + if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, vecCns, simdBaseType)) { - assert(op2 == nullptr); - - for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) + // Now assign the rest of the arguments. + for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) { - if (HandleArgForHWIntrinsicCreate(argList->Current(), argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; + HandleArgForHWIntrinsicCreate(node->Op(1), i, vecCns, simdBaseType); } + + cnsArgCnt = 1; } else { - if (HandleArgForHWIntrinsicCreate(op1, argCnt, vecCns, simdBaseType)) - { - cnsArgCnt += 1; - } - argCnt += 1; - - if (op2 != nullptr) + for (unsigned i = 1; i <= argCnt; i++) { - if (HandleArgForHWIntrinsicCreate(op2, argCnt, vecCns, simdBaseType)) + if (HandleArgForHWIntrinsicCreate(node->Op(i), i - 1, vecCns, simdBaseType)) { - cnsArgCnt += 1; - } - argCnt += 1; - } - else if (cnsArgCnt == 1) - { - // These intrinsics are meant to set the same value to every element - // so we'll just specially handle it here and copy it into the remaining - // indices. - - for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++) - { - HandleArgForHWIntrinsicCreate(op1, i, vecCns, simdBaseType); + cnsArgCnt++; } } } @@ -1592,47 +1550,16 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (argCnt == cnsArgCnt) { - if (op1->OperIsList()) + for (GenTree* arg : node->Operands()) { - for (argList = op1->AsArgList(); argList != nullptr; argList = argList->Rest()) - { - GenTree* arg = argList->Current(); - #if !defined(TARGET_64BIT) - if (arg->OperIsLong()) - { - BlockRange().Remove(arg->AsOp()->gtOp1); - BlockRange().Remove(arg->AsOp()->gtOp2); - } -#endif // !TARGET_64BIT - - BlockRange().Remove(arg); - } - } - else - { -#if !defined(TARGET_64BIT) - if (op1->OperIsLong()) + if (arg->OperIsLong()) { - BlockRange().Remove(op1->AsOp()->gtOp1); - BlockRange().Remove(op1->AsOp()->gtOp2); + BlockRange().Remove(arg->AsOp()->gtGetOp1()); + BlockRange().Remove(arg->AsOp()->gtGetOp2()); } #endif // !TARGET_64BIT - - BlockRange().Remove(op1); - - if (op2 != nullptr) - { -#if defined(TARGET_64BIT) - if (op2->OperIsLong()) - { - BlockRange().Remove(op2->AsOp()->gtOp1); - BlockRange().Remove(op2->AsOp()->gtOp2); - } -#endif // !TARGET_64BIT - - BlockRange().Remove(op2); - } + BlockRange().Remove(arg); } assert((simdSize == 8) || (simdSize == 12) || (simdSize == 16) || (simdSize == 32)); @@ -1644,16 +1571,12 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) if (vecCns.i64[0] == 0) { - node->gtOp1 = nullptr; - node->gtOp2 = nullptr; - node->gtHWIntrinsicId = (simdSize == 16) ? NI_Vector128_get_Zero : NI_Vector256_get_Zero; + node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_Zero : NI_Vector256_get_Zero); return; } else if (vecCns.i64[0] == -1) { - node->gtOp1 = nullptr; - node->gtOp2 = nullptr; - node->gtHWIntrinsicId = (simdSize == 16) ? NI_Vector128_get_AllBitsSet : NI_Vector256_get_AllBitsSet; + node->ResetHWIntrinsicId((simdSize == 16) ? NI_Vector128_get_AllBitsSet : NI_Vector256_get_AllBitsSet); return; } } @@ -1669,7 +1592,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertBefore(node, clsVarAddr); node->ChangeOper(GT_IND); - node->gtOp1 = clsVarAddr; + node->AsOp()->gtOp1 = clsVarAddr; // TODO-XARCH-CQ: We should be able to modify at least the paths that use Insert to trivially support partial // vector constants. With this, we can create a constant if say 50% of the inputs are also constant and just @@ -1702,10 +1625,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op1, tmp1); LowerNode(tmp1); - node->gtOp1 = tmp1; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_AVX2_BroadcastScalarToVector256; + node->ResetHWIntrinsicId(NI_AVX2_BroadcastScalarToVector256, tmp1); return; } @@ -1736,10 +1656,10 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op1, tmp1); LowerNode(tmp1); - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -1752,10 +1672,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x01, TYP_INT); BlockRange().InsertAfter(tmp3, idx); - node->gtOp1 = comp->gtNewArgList(tmp3, tmp1, idx); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_AVX_InsertVector128; + node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, tmp3, tmp1, idx); return; } @@ -1783,10 +1700,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Avx2.BroadcastScalarToVector128(tmp1); - node->gtOp1 = tmp1; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_AVX2_BroadcastScalarToVector128; + node->ChangeHWIntrinsicId(NI_AVX2_BroadcastScalarToVector128, tmp1); return; } @@ -1814,10 +1728,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(tmp1, tmp2); LowerNode(tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSSE3_Shuffle; + node->ResetHWIntrinsicId(NI_SSSE3_Shuffle, tmp1, tmp2); break; } @@ -1840,10 +1751,10 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse2.UnpackLow(tmp1, tmp2); // ... - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -1878,10 +1789,10 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -1913,12 +1824,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); - node->gtOp1 = tmp1; - node->gtOp2 = idx; - - node->gtHWIntrinsicId = NI_SSE2_Shuffle; + node->ResetHWIntrinsicId(NI_SSE2_Shuffle, tmp1, idx); node->SetSimdBaseJitType(CORINFO_TYPE_UINT); - break; } @@ -1943,18 +1850,15 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE2)); - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSE2_UnpackLow; + node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); break; } #endif // TARGET_AMD64 @@ -1977,10 +1881,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp1, idx); - node->gtOp1 = tmp1; - node->gtOp2 = idx; - - node->gtHWIntrinsicId = NI_AVX_Permute; + node->ResetHWIntrinsicId(NI_AVX_Permute, tmp1, idx); break; } @@ -2003,10 +1904,10 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) assert(comp->compIsaSupportedDebugOnly(InstructionSet_SSE)); - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -2014,10 +1915,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x00, TYP_INT); BlockRange().InsertAfter(tmp2, idx); - node->gtOp1 = comp->gtNewArgList(tmp1, tmp2, idx); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_SSE_Shuffle; + node->ResetHWIntrinsicId(NI_SSE_Shuffle, comp, tmp1, tmp2, idx); break; } @@ -2034,10 +1932,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // ... // return Sse3.MoveAndDuplicate(tmp1); - node->gtOp1 = tmp1; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_SSE3_MoveAndDuplicate; + node->ChangeHWIntrinsicId(NI_SSE3_MoveAndDuplicate, tmp1); break; } @@ -2058,20 +1953,16 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // var tmp2 = tmp1; // return Sse.MoveLowToHigh(tmp1, tmp2); - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSE_MoveLowToHigh; + node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); node->SetSimdBaseJitType(CORINFO_TYPE_FLOAT); - break; } @@ -2084,6 +1975,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) return; } + GenTree* op2 = node->Op(2); + // We have the following (where simd is simd16 or simd32): // /--* op1 T // +--* ... T @@ -2117,99 +2010,36 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // lo = Vector128.Create(op1, op2); // hi = Vector128.Create(op3, op4); // -or- - // lo = Vector128.Create(op1, ..., op3); - // hi = Vector128.Create(op4, ..., op7); + // lo = Vector128.Create(op1, ..., op4); + // hi = Vector128.Create(op5, ..., op8); // -or- - // lo = Vector128.Create(op1, ..., op7); - // hi = Vector128.Create(op8, ..., op15); + // lo = Vector128.Create(op1, ..., op8); + // hi = Vector128.Create(op9, ..., op16); // -or- - // lo = Vector128.Create(op1, ..., op15); - // hi = Vector128.Create(op16, ..., op31); + // lo = Vector128.Create(op1, ..., op16); + // hi = Vector128.Create(op17, ..., op32); - unsigned halfArgCnt = argCnt / 2; + size_t halfArgCnt = argCnt / 2; assert((halfArgCnt * 2) == argCnt); - argList = op1->AsArgList(); - - for (unsigned i = 0; i < halfArgCnt; i++) - { - op2 = argList; - argList = argList->Rest(); - } - - op2->AsArgList()->gtOp2 = nullptr; - op2 = argList; + GenTree* lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(), halfArgCnt, + NI_Vector128_Create, simdBaseJitType, 16); + BlockRange().InsertAfter(node->Op(halfArgCnt), lo); + LowerNode(lo); - // The above for loop splits the operand count into exactly half. - // Once it exits, op1 will point to op1 and op2 will point to the - // last operand that will be passed to the first Vector128.Create - // We will set its op2 to null, terminating the chain and then - // assign op2 to be argList, which is the first operand that will - // get passed to the second Vector128.Create - - GenTree* lo = nullptr; - GenTree* hi = nullptr; - - if (halfArgCnt == 2) - { - // The Vector256.Create calls that take 4 operands are special - // because the half argument count is 2, which means we can't - // actually use the GT_LIST anymore and need to pass them as - // explicit operands instead. - - argList = op1->AsArgList(); - - tmp1 = argList->Current(); - tmp2 = argList->Rest()->Current(); - - lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertAfter(tmp2, lo); - LowerNode(lo); - - argList = op2->AsArgList(); - - tmp1 = argList->Current(); - tmp2 = argList->Rest()->Current(); - - hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertAfter(tmp2, hi); - LowerNode(hi); - } - else - { - // The rest of the Vector256.Create calls take at least 8 operands - // and so the half count is at least 4 and we have to continue - // passing around GT_LIST nodes in op1 with a null op2 - assert(halfArgCnt >= 4); - - tmp1 = op2->AsArgList()->Current(); - - lo = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertBefore(tmp1, lo); - LowerNode(lo); - - hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_Create, simdBaseJitType, 16); - BlockRange().InsertBefore(node, hi); - LowerNode(hi); - } + GenTree* hi = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, node->GetOperandArray(halfArgCnt), halfArgCnt, + NI_Vector128_Create, simdBaseJitType, 16); + BlockRange().InsertAfter(node->Op(argCnt), hi); + LowerNode(hi); idx = comp->gtNewIconNode(0x01, TYP_INT); BlockRange().InsertAfter(hi, idx); - node->gtOp1 = comp->gtNewArgList(lo, hi, idx); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_AVX_InsertVector128; + assert(argCnt >= 3); + node->ResetHWIntrinsicId(NI_AVX_InsertVector128, comp, lo, hi, idx); return; } - if (op1->OperIsList()) - { - argList = op1->AsArgList(); - op1 = argList->Current(); - argList = argList->Rest(); - } - // We will be constructing the following parts: // /--* op1 T // tmp1 = * HWINTRINSIC simd16 T CreateScalarUnsafe @@ -2264,7 +2094,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse?.Insert(tmp1, opN, N); // ... - opN = argList->Current(); + opN = node->Op(N + 1); idx = comp->gtNewIconNode(N, TYP_INT); BlockRange().InsertAfter(opN, idx); @@ -2273,8 +2103,6 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) simdSize); BlockRange().InsertAfter(idx, tmp1); LowerNode(tmp1); - - argList = argList->Rest(); } assert(N == (argCnt - 1)); @@ -2291,15 +2119,12 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse?.Insert(tmp1, opN, N); // ... - opN = argList->Current(); + opN = node->Op(argCnt); idx = comp->gtNewIconNode(N, TYP_INT); BlockRange().InsertAfter(opN, idx); - node->gtOp1 = comp->gtNewArgList(tmp1, opN, idx); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = insIntrinsic; + node->ResetHWIntrinsicId(insIntrinsic, comp, tmp1, opN, idx); break; } @@ -2311,16 +2136,13 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) for (N = 1; N < argCnt; N++) { - opN = argList->Current(); + opN = node->Op(N + 1); op[N] = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, opN, NI_Vector128_CreateScalarUnsafe, simdBaseJitType, 16); BlockRange().InsertAfter(opN, op[N]); LowerNode(op[N]); - - argList = argList->Rest(); } - assert(argList == nullptr); if ((simdBaseType == TYP_BYTE) || (simdBaseType == TYP_UBYTE)) { @@ -2417,10 +2239,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op[3], tmp2); LowerNode(tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSE2_UnpackLow; + node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); node->SetSimdBaseJitType(CORINFO_TYPE_ULONG); break; } @@ -2446,10 +2265,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode(0x01, TYP_INT); BlockRange().InsertBefore(node, idx); - node->gtOp1 = comp->gtNewArgList(tmp1, op2, idx); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_SSE41_X64_Insert; + node->ResetHWIntrinsicId(NI_SSE41_X64_Insert, comp, tmp1, op2, idx); break; } @@ -2473,10 +2289,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op2, tmp2); LowerNode(tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSE2_UnpackLow; + node->ResetHWIntrinsicId(NI_SSE2_UnpackLow, tmp1, tmp2); break; } #endif // TARGET_AMD64 @@ -2508,7 +2321,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp1 = Sse41.Insert(tmp1, tmp2, N << 4); // ... - opN = argList->Current(); + opN = node->Op(N + 1); tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, opN, NI_Vector128_CreateScalarUnsafe, simdBaseJitType, 16); @@ -2522,8 +2335,6 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) simdSize); BlockRange().InsertAfter(idx, tmp1); LowerNode(tmp1); - - argList = argList->Rest(); } // We will be constructing the following parts: @@ -2542,7 +2353,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // tmp2 = Vector128.CreateScalarUnsafe(opN); // return Sse41.Insert(tmp1, tmp2, N << 4); - opN = argList->Current(); + opN = node->Op(argCnt); tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, opN, NI_Vector128_CreateScalarUnsafe, simdBaseJitType, 16); @@ -2552,10 +2363,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) idx = comp->gtNewIconNode((argCnt - 1) << 4, TYP_INT); BlockRange().InsertAfter(tmp2, idx); - node->gtOp1 = comp->gtNewArgList(tmp1, tmp2, idx); - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_SSE41_Insert; + node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, tmp2, idx); break; } @@ -2592,16 +2400,13 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) for (N = 1; N < argCnt; N++) { - opN = argList->Current(); + opN = node->Op(N + 1); op[N] = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, opN, NI_Vector128_CreateScalarUnsafe, simdBaseJitType, 16); BlockRange().InsertAfter(opN, op[N]); LowerNode(op[N]); - - argList = argList->Rest(); } - assert(argList == nullptr); tmp1 = comp->gtNewSimdHWIntrinsicNode(simdType, op[0], op[1], NI_SSE_UnpackLow, simdBaseJitType, simdSize); BlockRange().InsertAfter(op[1], tmp1); @@ -2611,10 +2416,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op[3], tmp2); LowerNode(tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSE_MoveLowToHigh; + node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); break; } @@ -2640,12 +2442,8 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(op2, tmp2); LowerNode(tmp2); - node->gtOp1 = tmp1; - node->gtOp2 = tmp2; - - node->gtHWIntrinsicId = NI_SSE_MoveLowToHigh; + node->ResetHWIntrinsicId(NI_SSE_MoveLowToHigh, tmp1, tmp2); node->SetSimdBaseJitType(CORINFO_TYPE_FLOAT); - break; } @@ -2664,7 +2462,7 @@ void Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); var_types simdType = node->gtType; CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); @@ -2674,11 +2472,8 @@ void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - assert(op1 != nullptr); - assert(op2 != nullptr); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); if (op1->OperIs(GT_IND)) { @@ -2828,7 +2623,7 @@ void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) unreached(); } - op2 = nullptr; + node->ResetHWIntrinsicId(resIntrinsic, op1); } else { @@ -2871,18 +2666,15 @@ void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) default: unreached(); } - } - assert(resIntrinsic != NI_Illegal); + node->ResetHWIntrinsicId(resIntrinsic, op1, op2); + } - node->gtHWIntrinsicId = resIntrinsic; - node->gtOp1 = op1; - node->gtOp2 = op2; node->SetSimdSize(16); if (!varTypeIsFloating(simdBaseType)) { - assert(node->gtHWIntrinsicId != intrinsicId); + assert(node->GetHWIntrinsicId() != intrinsicId); LowerNode(node); } @@ -2913,8 +2705,8 @@ void Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; - var_types simdType = node->gtType; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); + var_types simdType = node->TypeGet(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -2923,30 +2715,11 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = nullptr; - GenTree* op3 = nullptr; - - assert(op1->OperIsList()); - assert(node->gtGetOp2() == nullptr); - - GenTreeArgList* argList = op1->AsArgList(); - - op1 = argList->Current(); - argList = argList->Rest(); - - op2 = argList->Current(); - argList = argList->Rest(); - - op3 = argList->Current(); - argList = argList->Rest(); - - assert(op1 != nullptr); - assert(op2 != nullptr); - assert(op3 != nullptr); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); + GenTree* op3 = node->Op(3); assert(op2->OperIsConst()); - assert(argList == nullptr); ssize_t imm8 = op2->AsIntCon()->IconValue(); ssize_t cachedImm8 = imm8; @@ -2985,29 +2758,36 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // Spare GenTrees to be used for the lowering logic below // Defined upfront to avoid naming conflicts, etc... - GenTree* idx = nullptr; - GenTree* tmp1 = nullptr; - GenTree* tmp2 = nullptr; - GenTree* tmp3 = nullptr; - GenTree* tmpv = nullptr; - + GenTree* idx = nullptr; + GenTree* tmp1 = nullptr; + GenTree* tmp2 = nullptr; + GenTreeHWIntrinsic* result = node; + + // If we have a simd32 WithElement, we will spill the original + // simd32 source into a local, extract the lower/upper half from + // it and then operate on that. At the end, we will insert the simd16 + // result back into the simd32 local, producing our final value. if (intrinsicId == NI_Vector256_WithElement) { assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); + // This copy of "node" will have the simd16 value we need. + result = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, intrinsicId, simdBaseJitType, 16); + BlockRange().InsertBefore(node, result); + // We will be constructing the following parts: // ... // /--* op1 simd32 // * STORE_LCL_VAR simd32 - // tmpv = LCL_VAR simd32 - // op1 = LCL_VAR simd32 + // tmp32 = LCL_VAR simd32 + // op1 = LCL_VAR simd32 - node->gtOp1 = op1; - LIR::Use op1Use(BlockRange(), &node->gtOp1, node); + // TODO-CQ: move the tmp32 node closer to the final InsertVector128. + LIR::Use op1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(op1Use); - tmpv = node->gtOp1; + GenTree* tmp32 = node->Op(1); - op1 = comp->gtClone(tmpv); + op1 = comp->gtClone(tmp32); BlockRange().InsertBefore(op3, op1); if (imm8 >= count / 2) @@ -3050,9 +2830,12 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) } op1 = tmp1; - } - NamedIntrinsic resIntrinsic = NI_Illegal; + // Now we will insert our "result" into our simd32 temporary. + idx = comp->gtNewIconNode((cachedImm8 >= count / 2) ? 1 : 0); + BlockRange().InsertBefore(node, idx); + node->ChangeHWIntrinsicId(NI_AVX_InsertVector128, tmp32, result, idx); + } switch (simdBaseType) { @@ -3060,11 +2843,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_ULONG: { idx = comp->gtNewIconNode(imm8); - BlockRange().InsertBefore(node, idx); - - op1 = comp->gtNewArgList(op1, op3, idx); - op2 = nullptr; - resIntrinsic = NI_SSE41_X64_Insert; + BlockRange().InsertBefore(result, idx); + result->ChangeHWIntrinsicId(NI_SSE41_X64_Insert, op1, op3, idx); break; } @@ -3081,7 +2861,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_FLOAT, 16); - BlockRange().InsertBefore(node, tmp1); + BlockRange().InsertBefore(result, tmp1); LowerNode(tmp1); if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41)) @@ -3098,8 +2878,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // ... // node = Sse.MoveScalar(op1, op2); - op2 = tmp1; - resIntrinsic = NI_SSE_MoveScalar; + result->ResetHWIntrinsicId(NI_SSE_MoveScalar, op1, tmp1); } else { @@ -3125,10 +2904,10 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // tmp2 = Sse.Shuffle(tmp1, op1, 0 or 48 or 32); // node = Sse.Shuffle(tmp2, op1, 226 or 132 or 36); - node->gtOp1 = op1; - LIR::Use op1Use(BlockRange(), &node->gtOp1, node); + result->Op(1) = op1; + LIR::Use op1Use(BlockRange(), &result->Op(1), result); ReplaceWithLclVar(op1Use); - op2 = node->gtOp1; + op2 = result->Op(1); tmp2 = comp->gtClone(op2); BlockRange().InsertAfter(tmp1, tmp2); @@ -3194,9 +2973,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) std::swap(op1, op2); } - op1 = comp->gtNewArgList(op1, op2, idx); - op2 = nullptr; - resIntrinsic = NI_SSE_Shuffle; + result->ChangeHWIntrinsicId(NI_SSE_Shuffle, op1, op2, idx); } break; } @@ -3214,11 +2991,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_UINT: { idx = comp->gtNewIconNode(imm8); - BlockRange().InsertBefore(node, idx); - - op1 = comp->gtNewArgList(op1, op3, idx); - op2 = nullptr; - resIntrinsic = NI_SSE41_Insert; + BlockRange().InsertBefore(result, idx); + result->ChangeHWIntrinsicId(NI_SSE41_Insert, op1, op3, idx); break; } @@ -3226,11 +3000,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) case TYP_USHORT: { idx = comp->gtNewIconNode(imm8); - BlockRange().InsertBefore(node, idx); - - op1 = comp->gtNewArgList(op1, op3, idx); - op2 = nullptr; - resIntrinsic = NI_SSE2_Insert; + BlockRange().InsertBefore(result, idx); + result->ChangeHWIntrinsicId(NI_SSE2_Insert, op1, op3, idx); break; } @@ -3247,11 +3018,10 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_DOUBLE, 16); - BlockRange().InsertBefore(node, tmp1); + BlockRange().InsertBefore(result, tmp1); LowerNode(tmp1); - op2 = tmp1; - resIntrinsic = (imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow; + result->ResetHWIntrinsicId((imm8 == 0) ? NI_SSE2_MoveScalar : NI_SSE2_UnpackLow, op1, tmp1); break; } @@ -3259,28 +3029,16 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) unreached(); } - assert(resIntrinsic != NI_Illegal); + assert(result->GetHWIntrinsicId() != intrinsicId); - if (tmpv != nullptr) + LowerNode(result); + if (intrinsicId == NI_Vector256_WithElement) { - tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, resIntrinsic, simdBaseJitType, 16); - BlockRange().InsertBefore(node, tmp1); - LowerNode(tmp1); - - idx = comp->gtNewIconNode((cachedImm8 >= count / 2) ? 1 : 0); - BlockRange().InsertAfter(tmp1, idx); - - op1 = comp->gtNewArgList(tmpv, tmp1, idx); - op2 = nullptr; - resIntrinsic = NI_AVX_InsertVector128; + // Now that we have finalized the shape of the tree, lower the insertion node as well. + assert(node->GetHWIntrinsicId() == NI_AVX_InsertVector128); + assert(node != result); + LowerNode(node); } - - node->gtHWIntrinsicId = resIntrinsic; - node->gtOp1 = op1; - node->gtOp2 = op2; - - assert(node->gtHWIntrinsicId != intrinsicId); - LowerNode(node); } //---------------------------------------------------------------------------------------------- @@ -3291,7 +3049,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -3303,12 +3061,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) assert(varTypeIsArithmetic(simdBaseType)); assert(simdSize != 0); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - - assert(op1 != nullptr); - assert(op2 != nullptr); - assert(!op1->OperIsList()); + GenTree* op1 = node->Op(1); + GenTree* op2 = node->Op(2); // Spare GenTrees to be used for the lowering logic below // Defined upfront to avoid naming conflicts, etc... @@ -3375,10 +3129,10 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(idx, tmp1); LowerNode(tmp1); - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -3397,12 +3151,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) node->SetSimdSize(16); - node->gtOp1 = tmp3; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_Vector128_ToScalar; + node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); LowerNode(node); - return; } @@ -3488,12 +3238,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(idx, tmp3); LowerNode(tmp3); - node->gtOp1 = tmp3; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_Vector128_ToScalar; + node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); LowerNode(node); - return; } @@ -3533,12 +3279,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) BlockRange().InsertAfter(idx, tmp3); LowerNode(tmp3); - node->gtOp1 = tmp3; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_Vector128_ToScalar; + node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp3); LowerNode(node); - return; } @@ -3646,10 +3388,10 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // tmp2 = tmp1; // ... - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -3748,10 +3490,10 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // tmp2 = Isa.Shuffle(tmp2, tmp3, shuffleConst); // ... - node->gtOp1 = tmp2; - LIR::Use tmp2Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp2; + LIR::Use tmp2Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp2Use); - tmp2 = node->gtOp1; + tmp2 = node->Op(1); tmp3 = comp->gtClone(tmp2); BlockRange().InsertAfter(tmp2, tmp3); @@ -3860,10 +3602,10 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // var tmp1 = Isa.Add(tmp1, tmp2); // ... - node->gtOp1 = tmp1; - LIR::Use tmp1Use(BlockRange(), &node->gtOp1, node); + node->Op(1) = tmp1; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); ReplaceWithLclVar(tmp1Use); - tmp1 = node->gtOp1; + tmp1 = node->Op(1); tmp2 = comp->gtClone(tmp1); BlockRange().InsertAfter(tmp1, tmp2); @@ -3892,13 +3634,8 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // ... // return tmp1.ToScalar(); - node->gtOp1 = tmp1; - node->gtOp2 = nullptr; - - node->gtHWIntrinsicId = NI_Vector128_ToScalar; + node->ResetHWIntrinsicId(NI_Vector128_ToScalar, tmp1); LowerNode(node); - - return; } //---------------------------------------------------------------------------------------------- @@ -3909,7 +3646,7 @@ void Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) // void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); unsigned simdSize = node->GetSimdSize(); @@ -3928,7 +3665,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) { node->gtType = TYP_INT; node->SetSimdBaseJitType(CORINFO_TYPE_INT); - node->gtHWIntrinsicId = NI_SSE2_ConvertToInt32; + node->ChangeHWIntrinsicId(NI_SSE2_ConvertToInt32); break; } @@ -3938,20 +3675,20 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) { node->gtType = TYP_UINT; node->SetSimdBaseJitType(CORINFO_TYPE_UINT); - node->gtHWIntrinsicId = NI_SSE2_ConvertToUInt32; + node->ChangeHWIntrinsicId(NI_SSE2_ConvertToUInt32); break; } #if defined(TARGET_AMD64) case TYP_LONG: { - node->gtHWIntrinsicId = NI_SSE2_X64_ConvertToInt64; + node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertToInt64); break; } case TYP_ULONG: { - node->gtHWIntrinsicId = NI_SSE2_X64_ConvertToUInt64; + node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertToUInt64); break; } #endif // TARGET_AMD64 @@ -3983,6 +3720,7 @@ void Lowering::LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node) { use.ReplaceWith(cast); } + LowerNode(cast); } } @@ -5395,11 +5133,11 @@ void Lowering::ContainCheckIntrinsic(GenTreeOp* node) // void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) { - switch (simdNode->gtSIMDIntrinsicID) + switch (simdNode->GetSIMDIntrinsicId()) { case SIMDIntrinsicInit: { - GenTree* op1 = simdNode->AsOp()->gtOp1; + GenTree* op1 = simdNode->Op(1); #ifndef TARGET_64BIT if (op1->OperGet() == GT_LONG) { @@ -5435,13 +5173,13 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) case SIMDIntrinsicInitArray: // We have an array and an index, which may be contained. - CheckImmedAndMakeContained(simdNode, simdNode->gtGetOp2()); + CheckImmedAndMakeContained(simdNode, simdNode->Op(2)); break; case SIMDIntrinsicShuffleSSE2: // Second operand is an integer constant and marked as contained. - assert(simdNode->AsOp()->gtOp2->IsCnsIntOrI()); - MakeSrcContained(simdNode, simdNode->AsOp()->gtOp2); + assert(simdNode->Op(2)->IsCnsIntOrI()); + MakeSrcContained(simdNode, simdNode->Op(2)); break; default: @@ -5465,7 +5203,7 @@ void Lowering::ContainCheckSIMD(GenTreeSIMD* simdNode) // bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, GenTree* node, bool* supportsRegOptional) { - NamedIntrinsic containingIntrinsicId = containingNode->gtHWIntrinsicId; + NamedIntrinsic containingIntrinsicId = containingNode->GetHWIntrinsicId(); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(containingIntrinsicId); // We shouldn't have called in here if containingNode doesn't support containment @@ -5632,25 +5370,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge assert(supportsGeneralLoads == false); assert(supportsSIMDScalarLoads == false); - GenTree* op1 = containingNode->gtGetOp1(); - GenTree* op2 = nullptr; - GenTree* op3 = nullptr; - - assert(op1->OperIsList()); - assert(containingNode->gtGetOp2() == nullptr); - - GenTreeArgList* argList = op1->AsArgList(); - - op1 = argList->Current(); - argList = argList->Rest(); - - op2 = argList->Current(); - argList = argList->Rest(); + GenTree* op1 = containingNode->Op(1); + GenTree* op2 = containingNode->Op(2); + GenTree* op3 = containingNode->Op(3); assert(node == op2); - op3 = argList->Current(); - // The upper two bits of the immediate value are ignored if // op2 comes from memory. In order to support using the upper // bits, we need to disable containment support if op3 is not @@ -5825,7 +5550,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge // TODO-XArch: Update this to be table driven, if possible. - NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->AsHWIntrinsic()->GetHWIntrinsicId(); switch (intrinsicId) { @@ -5891,23 +5616,18 @@ void Lowering::ContainCheckHWIntrinsicAddr(GenTreeHWIntrinsic* node, GenTree* ad // void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { - NamedIntrinsic intrinsicId = node->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); - int numArgs = HWIntrinsicInfo::lookupNumArgs(node); + size_t numArgs = node->GetOperandCount(); CorInfoType simdBaseJitType = node->GetSimdBaseJitType(); var_types simdBaseType = node->GetSimdBaseType(); - GenTree* op1 = node->gtGetOp1(); - GenTree* op2 = node->gtGetOp2(); - GenTree* op3 = nullptr; - if (!HWIntrinsicInfo::SupportsContainment(intrinsicId)) { // AVX2 gather are not containable and always have constant IMM argument if (HWIntrinsicInfo::isAVX2GatherIntrinsic(intrinsicId)) { - GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); - assert(lastOp != nullptr); + GenTree* lastOp = node->Op(numArgs); MakeSrcContained(node, lastOp); } // Exit early if containment isn't supported @@ -5916,8 +5636,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (HWIntrinsicInfo::lookupCategory(intrinsicId) == HW_Category_IMM) { - GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); - assert(lastOp != nullptr); + GenTree* lastOp = node->Op(numArgs); if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI()) { @@ -5940,18 +5659,21 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) const bool isCommutative = HWIntrinsicInfo::IsCommutative(intrinsicId); + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + if (numArgs == 1) { // One argument intrinsics cannot be commutative assert(!isCommutative); - assert(!op1->OperIsList()); - assert(op2 == nullptr); + op1 = node->Op(1); switch (category) { case HW_Category_MemoryLoad: - ContainCheckHWIntrinsicAddr(node, node->gtGetOp1()); + ContainCheckHWIntrinsicAddr(node, op1); break; case HW_Category_SimpleSIMD: @@ -6004,9 +5726,9 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_ConvertToVector256Int16: case NI_AVX2_ConvertToVector256Int32: case NI_AVX2_ConvertToVector256Int64: - if (!varTypeIsSIMD(op1->gtType)) + if (!varTypeIsSIMD(op1)) { - ContainCheckHWIntrinsicAddr(node, node->gtGetOp1()); + ContainCheckHWIntrinsicAddr(node, op1); return; } break; @@ -6041,29 +5763,28 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { if (numArgs == 2) { - assert(!op1->OperIsList()); - assert(op2 != nullptr); - assert(!op2->OperIsList()); + op1 = node->Op(1); + op2 = node->Op(2); switch (category) { case HW_Category_MemoryLoad: if ((intrinsicId == NI_AVX_MaskLoad) || (intrinsicId == NI_AVX2_MaskLoad)) { - ContainCheckHWIntrinsicAddr(node, node->gtGetOp1()); + ContainCheckHWIntrinsicAddr(node, op1); } else { - ContainCheckHWIntrinsicAddr(node, node->gtGetOp2()); + ContainCheckHWIntrinsicAddr(node, op2); } break; case HW_Category_MemoryStore: - ContainCheckHWIntrinsicAddr(node, node->gtGetOp1()); + ContainCheckHWIntrinsicAddr(node, op1); if (((intrinsicId == NI_SSE_Store) || (intrinsicId == NI_SSE2_Store)) && op2->OperIsHWIntrinsic() && - ((op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_AVX_ExtractVector128) || - (op2->AsHWIntrinsic()->gtHWIntrinsicId == NI_AVX2_ExtractVector128)) && + ((op2->AsHWIntrinsic()->GetHWIntrinsicId() == NI_AVX_ExtractVector128) || + (op2->AsHWIntrinsic()->GetHWIntrinsicId() == NI_AVX2_ExtractVector128)) && op2->gtGetOp2()->IsIntegralConst()) { MakeSrcContained(node, op2); @@ -6087,8 +5808,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) MakeSrcContained(node, op1); // Swap the operands here to make the containment checks in codegen significantly simpler - node->gtOp1 = op2; - node->gtOp2 = op1; + node->Op(1) = op2; + node->Op(2) = op1; } else if (supportsRegOptional) { @@ -6219,15 +5940,13 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // These intrinsics should have been marked contained by the general-purpose handling // earlier in the method. - GenTree* lastOp = HWIntrinsicInfo::lookupLastOp(node); - assert(lastOp != nullptr); + GenTree* lastOp = node->Op(numArgs); if (HWIntrinsicInfo::isImmOp(intrinsicId, lastOp) && lastOp->IsCnsIntOrI()) { assert(lastOp->isContained()); } #endif - break; } @@ -6296,25 +6015,14 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // three argument intrinsics should not be marked commutative assert(!isCommutative); - assert(op1->OperIsList()); - assert(op2 == nullptr); - - GenTreeArgList* argList = op1->AsArgList(); - GenTreeArgList* originalArgList = argList; - - op1 = argList->Current(); - argList = argList->Rest(); - - op2 = argList->Current(); - argList = argList->Rest(); - - op3 = argList->Current(); - assert(argList->Rest() == nullptr); + op1 = node->Op(1); + op2 = node->Op(2); + op3 = node->Op(3); switch (category) { case HW_Category_MemoryStore: - ContainCheckHWIntrinsicAddr(node, node->gtGetOp1()->AsOp()->gtGetOp1()); + ContainCheckHWIntrinsicAddr(node, op1); break; case HW_Category_SimpleSIMD: @@ -6403,8 +6111,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) MakeSrcContained(node, op1); // MultiplyNoFlags is a Commutative operation, so swap the first two operands here // to make the containment checks in codegen significantly simpler - originalArgList->Current() = op2; - originalArgList->Rest()->Current() = op1; + node->Op(1) = op2; + node->Op(2) = op1; } else if (supportsRegOptional) { diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 39bd3a0cb783c..ba7d750de3666 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -6297,9 +6297,8 @@ void LinearScan::insertUpperVectorSave(GenTree* tree, saveLcl->SetRegNum(lclVarReg); SetLsraAdded(saveLcl); - GenTreeSIMD* simdNode = - new (compiler, GT_SIMD) GenTreeSIMD(LargeVectorSaveType, saveLcl, nullptr, SIMDIntrinsicUpperSave, - varDsc->GetSimdBaseJitType(), genTypeSize(varDsc->lvType)); + GenTreeSIMD* simdNode = compiler->gtNewSIMDNode(LargeVectorSaveType, saveLcl, SIMDIntrinsicUpperSave, + varDsc->GetSimdBaseJitType(), genTypeSize(varDsc)); if (simdNode->GetSimdBaseJitType() == CORINFO_TYPE_UNDEF) { @@ -6364,9 +6363,8 @@ void LinearScan::insertUpperVectorRestore(GenTree* tree, restoreLcl->SetRegNum(lclVarReg); SetLsraAdded(restoreLcl); - GenTreeSIMD* simdNode = - new (compiler, GT_SIMD) GenTreeSIMD(varDsc->lvType, restoreLcl, nullptr, SIMDIntrinsicUpperRestore, - varDsc->GetSimdBaseJitType(), genTypeSize(varDsc->lvType)); + GenTreeSIMD* simdNode = compiler->gtNewSIMDNode(varDsc->TypeGet(), restoreLcl, SIMDIntrinsicUpperRestore, + varDsc->GetSimdBaseJitType(), genTypeSize(varDsc->lvType)); if (simdNode->GetSimdBaseJitType() == CORINFO_TYPE_UNDEF) { diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index ce94501a39671..d5700e1432d35 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1804,13 +1804,13 @@ class LinearScan : public LinearScanInterface void setDelayFree(RefPosition* use); int BuildBinaryUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); #ifdef TARGET_XARCH - int BuildRMWUses(GenTreeOp* node, regMaskTP candidates = RBM_NONE); + int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates = RBM_NONE); #endif // !TARGET_XARCH // This is the main entry point for building the RefPositions for a node. // These methods return the number of sources. int BuildNode(GenTree* tree); - void getTgtPrefOperands(GenTreeOp* tree, bool& prefOp1, bool& prefOp2); + void getTgtPrefOperands(GenTree* tree, GenTree* op1, GenTree* op2, bool* prefOp1, bool* prefOp2); bool supportsSpecialPutArg(); int BuildSimple(GenTree* tree); diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 0cea6e1764e6a..73c87786ae445 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -433,7 +433,6 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; break; - case GT_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 8044043ba58fd..132480b6f42a2 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -121,7 +121,6 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; break; - case GT_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: @@ -802,17 +801,14 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) // Only SIMDIntrinsicInit can be contained if (simdTree->isContained()) { - assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); + assert(simdTree->GetSIMDIntrinsicId() == SIMDIntrinsicInit); } int dstCount = simdTree->IsValue() ? 1 : 0; assert(dstCount == 1); bool buildUses = true; - GenTree* op1 = simdTree->gtGetOp1(); - GenTree* op2 = simdTree->gtGetOp2(); - - switch (simdTree->gtSIMDIntrinsicID) + switch (simdTree->GetSIMDIntrinsicId()) { case SIMDIntrinsicInit: case SIMDIntrinsicCast: @@ -834,25 +830,22 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { var_types baseType = simdTree->GetSimdBaseType(); srcCount = (short)(simdTree->GetSimdSize() / genTypeSize(baseType)); + assert(simdTree->GetOperandCount() == static_cast(srcCount)); if (varTypeIsFloating(simdTree->GetSimdBaseType())) { // Need an internal register to stitch together all the values into a single vector in a SIMD reg. buildInternalFloatRegisterDefForNode(simdTree); } - int initCount = 0; - for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) + for (GenTree* operand : simdTree->Operands()) { - assert(list->OperGet() == GT_LIST); - GenTree* listItem = list->gtGetOp1(); - assert(listItem->TypeGet() == baseType); - assert(!listItem->isContained()); - BuildUse(listItem); - initCount++; + assert(operand->TypeIs(baseType)); + assert(!operand->isContained()); + + BuildUse(operand); } - assert(initCount == srcCount); - buildUses = false; + buildUses = false; break; } @@ -876,12 +869,12 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) } if (buildUses) { - assert(!op1->OperIs(GT_LIST)); assert(srcCount == 0); - srcCount = BuildOperandUses(op1); - if ((op2 != nullptr) && !op2->isContained()) + srcCount = BuildOperandUses(simdTree->Op(1)); + + if ((simdTree->GetOperandCount() == 2) && !simdTree->Op(2)->isContained()) { - srcCount += BuildOperandUses(op2); + srcCount += BuildOperandUses(simdTree->Op(2)); } } assert(internalCount <= MaxInternalCount); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index c286a71406938..6482a2436555c 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -985,7 +985,7 @@ regMaskTP LinearScan::getKillSetForHWIntrinsic(GenTreeHWIntrinsic* node) { regMaskTP killMask = RBM_NONE; #ifdef TARGET_XARCH - switch (node->gtHWIntrinsicId) + switch (node->GetHWIntrinsicId()) { case NI_SSE2_MaskMove: // maskmovdqu uses edi as the implicit address register. @@ -1683,10 +1683,9 @@ int LinearScan::ComputeAvailableSrcCount(GenTree* node) // void LinearScan::buildRefPositionsForNode(GenTree* tree, LsraLocation currentLoc) { - // The LIR traversal doesn't visit GT_LIST or GT_ARGPLACE nodes. + // The LIR traversal doesn't visit GT_ARGPLACE nodes. // GT_CLS_VAR nodes should have been eliminated by rationalizer. assert(tree->OperGet() != GT_ARGPLACE); - assert(tree->OperGet() != GT_LIST); assert(tree->OperGet() != GT_CLS_VAR); // The set of internal temporary registers used by this node are stored in the @@ -3089,9 +3088,11 @@ int LinearScan::BuildOperandUses(GenTree* node, regMaskTP candidates) { if (node->AsHWIntrinsic()->OperIsMemoryLoad()) { - return BuildAddrUses(node->gtGetOp1()); + return BuildAddrUses(node->AsHWIntrinsic()->Op(1)); } - BuildUse(node->gtGetOp1(), candidates); + + assert(node->AsHWIntrinsic()->GetOperandCount() == 1); + BuildUse(node->AsHWIntrinsic()->Op(1), candidates); return 1; } #endif // FEATURE_HW_INTRINSICS @@ -3153,10 +3154,13 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP ca { use = BuildUse(node, candidates); } +#ifdef FEATURE_HW_INTRINSICS else if (node->OperIsHWIntrinsic()) { - use = BuildUse(node->gtGetOp1(), candidates); + assert(node->AsHWIntrinsic()->GetOperandCount() == 1); + use = BuildUse(node->AsHWIntrinsic()->Op(1), candidates); } +#endif else if (!node->OperIsIndir()) { return 0; @@ -3229,15 +3233,17 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, GenTree* rmwNode, regMaskTP ca // int LinearScan::BuildBinaryUses(GenTreeOp* node, regMaskTP candidates) { + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2IfPresent(); + #ifdef TARGET_XARCH if (node->OperIsBinary() && isRMWRegOper(node)) { - return BuildRMWUses(node, candidates); + assert(op2 != nullptr); + return BuildRMWUses(node, op1, op2, candidates); } #endif // TARGET_XARCH - int srcCount = 0; - GenTree* op1 = node->gtOp1; - GenTree* op2 = node->gtGetOp2IfPresent(); + int srcCount = 0; if (op1 != nullptr) { srcCount += BuildOperandUses(op1, candidates); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 9b23c3a57d223..68a5c3147d8eb 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -124,7 +124,6 @@ int LinearScan::BuildNode(GenTree* tree) srcCount = 0; break; - case GT_LIST: case GT_ARGPLACE: case GT_NO_OP: case GT_START_NONGC: @@ -701,6 +700,8 @@ int LinearScan::BuildNode(GenTree* tree) // // Arguments: // tree - the node of interest. +// op1 - its first operand +// op2 - its second operand // prefOp1 - a bool "out" parameter indicating, on return, whether op1 should be preferenced to the target. // prefOp2 - a bool "out" parameter indicating, on return, whether op2 should be preferenced to the target. // @@ -710,27 +711,24 @@ int LinearScan::BuildNode(GenTree* tree) // Notes: // The caller is responsible for initializing the two "out" parameters to false. // -void LinearScan::getTgtPrefOperands(GenTreeOp* tree, bool& prefOp1, bool& prefOp2) +void LinearScan::getTgtPrefOperands(GenTree* tree, GenTree* op1, GenTree* op2, bool* prefOp1, bool* prefOp2) { // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1. // Even then we would like to set isTgtPref on Op1. - if (tree->OperIsBinary() && isRMWRegOper(tree)) + if (isRMWRegOper(tree)) { - GenTree* op1 = tree->gtGetOp1(); - GenTree* op2 = tree->gtGetOp2(); - // If we have a read-modify-write operation, we want to preference op1 to the target, // if it is not contained. - if (!op1->isContained() && !op1->OperIs(GT_LIST)) + if (!op1->isContained()) { - prefOp1 = true; + *prefOp1 = true; } // Commutative opers like add/mul/and/or/xor could reverse the order of operands if it is safe to do so. // In that case we will preference both, to increase the chance of getting a match. - if (tree->OperIsCommutative() && op2 != nullptr && !op2->isContained()) + if (tree->OperIsCommutative() && (op2 != nullptr) && !op2->isContained()) { - prefOp2 = true; + *prefOp2 = true; } } } @@ -751,7 +749,13 @@ bool LinearScan::isRMWRegOper(GenTree* tree) { // TODO-XArch-CQ: Make this more accurate. // For now, We assume that most binary operators are of the RMW form. + CLANG_FORMAT_COMMENT_ANCHOR; + +#ifdef FEATURE_HW_INTRINSICS + assert(tree->OperIsBinary() || (tree->OperIsMultiOp() && (tree->AsMultiOp()->GetOperandCount() <= 2))); +#else assert(tree->OperIsBinary()); +#endif if (tree->OperIsCompare() || tree->OperIs(GT_CMP) || tree->OperIs(GT_BT)) { @@ -801,11 +805,9 @@ bool LinearScan::isRMWRegOper(GenTree* tree) } // Support for building RefPositions for RMW nodes. -int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) +int LinearScan::BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, regMaskTP candidates) { int srcCount = 0; - GenTree* op1 = node->gtOp1; - GenTree* op2 = node->gtGetOp2IfPresent(); regMaskTP op1Candidates = candidates; regMaskTP op2Candidates = candidates; @@ -828,7 +830,7 @@ int LinearScan::BuildRMWUses(GenTreeOp* node, regMaskTP candidates) bool prefOp1 = false; bool prefOp2 = false; - getTgtPrefOperands(node, prefOp1, prefOp2); + getTgtPrefOperands(node, op1, op2, &prefOp1, &prefOp2); assert(!prefOp2 || node->OperIsCommutative()); // Determine which operand, if any, should be delayRegFree. Normally, this would be op2, @@ -1873,14 +1875,12 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) if (simdTree->isContained()) { // Only SIMDIntrinsicInit can be contained - assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicInit); + assert(simdTree->GetSIMDIntrinsicId() == SIMDIntrinsicInit); } SetContainsAVXFlags(simdTree->GetSimdSize()); - GenTree* op1 = simdTree->gtGetOp1(); - GenTree* op2 = simdTree->gtGetOp2(); - int srcCount = 0; + int srcCount = 0; - switch (simdTree->gtSIMDIntrinsicID) + switch (simdTree->GetSIMDIntrinsicId()) { case SIMDIntrinsicInit: { @@ -1893,6 +1893,8 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) CLANG_FORMAT_COMMENT_ANCHOR; #if !defined(TARGET_64BIT) + GenTree* op1 = simdTree->Op(1); + if (op1->OperGet() == GT_LONG) { assert(op1->isContained()); @@ -1928,19 +1930,19 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) { var_types baseType = simdTree->GetSimdBaseType(); srcCount = (short)(simdTree->GetSimdSize() / genTypeSize(baseType)); + assert(simdTree->GetOperandCount() == static_cast(srcCount)); + // Need an internal register to stitch together all the values into a single vector in a SIMD reg. buildInternalFloatRegisterDefForNode(simdTree); - int initCount = 0; - for (GenTree* list = op1; list != nullptr; list = list->gtGetOp2()) + + for (GenTree* operand : simdTree->Operands()) { - assert(list->OperGet() == GT_LIST); - GenTree* listItem = list->gtGetOp1(); - assert(listItem->TypeGet() == baseType); - assert(!listItem->isContained()); - BuildUse(listItem); - initCount++; + assert(operand->TypeIs(baseType)); + assert(!operand->isContained()); + + BuildUse(operand); } - assert(initCount == srcCount); + buildUses = false; } break; @@ -2010,7 +2012,7 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) case SIMDIntrinsicShuffleSSE2: // Second operand is an integer constant and marked as contained. - assert(simdTree->gtGetOp2()->isContainedIntOrIImmed()); + assert(simdTree->Op(2)->isContainedIntOrIImmed()); break; default: @@ -2019,10 +2021,11 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) } if (buildUses) { - assert(!op1->OperIs(GT_LIST)); assert(srcCount == 0); // This is overly conservative, but is here for zero diffs. - srcCount = BuildRMWUses(simdTree); + GenTree* op1 = simdTree->Op(1); + GenTree* op2 = (simdTree->GetOperandCount() == 2) ? simdTree->Op(2) : nullptr; + srcCount = BuildRMWUses(simdTree, op1, op2); } buildInternalRegisterUses(); BuildDef(simdTree, dstCandidates); @@ -2042,10 +2045,10 @@ int LinearScan::BuildSIMD(GenTreeSIMD* simdTree) // int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { - NamedIntrinsic intrinsicId = intrinsicTree->gtHWIntrinsicId; + NamedIntrinsic intrinsicId = intrinsicTree->GetHWIntrinsicId(); var_types baseType = intrinsicTree->GetSimdBaseType(); + size_t numArgs = intrinsicTree->GetOperandCount(); HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(intrinsicId); - int numArgs = HWIntrinsicInfo::lookupNumArgs(intrinsicTree); // Set the AVX Flags if this instruction may use VEX encoding for SIMD operations. // Note that this may be true even if the ISA is not AVX (e.g. for platform-agnostic intrinsics @@ -2055,60 +2058,21 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) SetContainsAVXFlags(intrinsicTree->GetSimdSize()); } - GenTree* op1 = intrinsicTree->gtGetOp1(); - GenTree* op2 = intrinsicTree->gtGetOp2(); - GenTree* op3 = nullptr; - GenTree* lastOp = nullptr; - int srcCount = 0; int dstCount = intrinsicTree->IsValue() ? 1 : 0; regMaskTP dstCandidates = RBM_NONE; - if (op1 == nullptr) + if (intrinsicTree->GetOperandCount() == 0) { - assert(op2 == nullptr); assert(numArgs == 0); } else { - if (op1->OperIsList()) - { - assert(op2 == nullptr); - assert(numArgs >= 3); - - GenTreeArgList* argList = op1->AsArgList(); - - op1 = argList->Current(); - argList = argList->Rest(); - - op2 = argList->Current(); - argList = argList->Rest(); - - op3 = argList->Current(); - - while (argList->Rest() != nullptr) - { - argList = argList->Rest(); - } - - lastOp = argList->Current(); - argList = argList->Rest(); - - assert(argList == nullptr); - } - else if (op2 != nullptr) - { - assert(numArgs == 2); - lastOp = op2; - } - else - { - assert(numArgs == 1); - lastOp = op1; - } - - assert(lastOp != nullptr); + GenTree* op1 = intrinsicTree->Op(1); + GenTree* op2 = (numArgs >= 2) ? intrinsicTree->Op(2) : nullptr; + GenTree* op3 = (numArgs >= 3) ? intrinsicTree->Op(3) : nullptr; + GenTree* lastOp = intrinsicTree->Op(numArgs); bool buildUses = true; @@ -2393,12 +2357,10 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) case NI_AVX2_GatherMaskVector128: case NI_AVX2_GatherMaskVector256: { - assert(numArgs == 5); assert(!isRMW); - assert(intrinsicTree->gtGetOp1()->OperIsList()); - GenTreeArgList* argList = intrinsicTree->gtGetOp1()->AsArgList()->Rest()->Rest()->Rest(); - GenTree* op4 = argList->Current(); + GenTree* op4 = intrinsicTree->Op(4); + GenTree* op5 = intrinsicTree->Op(5); // Any pair of the index, mask, or destination registers should be different srcCount += BuildOperandUses(op1); @@ -2407,7 +2369,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) srcCount += BuildDelayFreeUses(op4); // op5 should always be contained - assert(argList->Rest()->Current()->isContained()); + assert(op5->isContained()); // get a tmp register for mask that will be cleared by gather instructions buildInternalFloatRegisterDefForNode(intrinsicTree, allSIMDRegs()); @@ -2446,7 +2408,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) { if (op2->OperIs(GT_HWINTRINSIC) && op2->AsHWIntrinsic()->OperIsMemoryLoad() && op2->isContained()) { - srcCount += BuildAddrUses(op2->gtGetOp1()); + srcCount += BuildAddrUses(op2->AsHWIntrinsic()->Op(1)); } else if (isRMW) { diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 3840aec920826..53a4c29bc6667 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5864,7 +5864,7 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac) GenTree* newTree = fgMorphFieldToSimdGetElement(tree); if (newTree != tree) { - newTree = fgMorphSmpOp(newTree); + newTree = fgMorphTree(newTree); return newTree; } } @@ -10054,15 +10054,14 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree) } else // InitBlk { -#if FEATURE_SIMD +#ifdef FEATURE_SIMD if (varTypeIsSIMD(asgType)) { assert(!isCopyBlock); // Else we would have returned the tree above. noway_assert(src->IsIntegralConst(0)); noway_assert(destVarDsc != nullptr); - src = new (this, GT_SIMD) - GenTreeSIMD(asgType, src, SIMDIntrinsicInit, destVarDsc->GetSimdBaseJitType(), size); + src = gtNewSIMDNode(asgType, src, SIMDIntrinsicInit, destVarDsc->GetSimdBaseJitType(), size); } else #endif @@ -10753,9 +10752,6 @@ GenTree* Compiler::fgMorphFieldToSimdGetElement(GenTree* tree) tree = gtNewSimdGetElementNode(simdBaseType, simdStructNode, op2, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true); -#ifdef DEBUG - tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED; -#endif } return tree; } @@ -11571,9 +11567,6 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) } break; #endif - case GT_LIST: - // Special handling for the arg list. - return fgMorphArgList(tree->AsArgList(), mac); case GT_PUTARG_TYPE: return fgMorphTree(tree->AsUnOp()->gtGetOp1()); @@ -13276,7 +13269,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac) // Propagate comma throws. // If we are in the Valuenum CSE phase then don't morph away anything as these // nodes may have CSE defs/uses in them. - if (fgGlobalMorph && (oper != GT_ASG) && (oper != GT_COLON) && !tree->OperIsAnyList()) + if (fgGlobalMorph && (oper != GT_ASG) && (oper != GT_COLON)) { if ((op1 != nullptr) && fgIsCommaThrow(op1, true)) { @@ -14197,52 +14190,72 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree) } break; + default: + break; + } + return tree; +} + +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +//------------------------------------------------------------------------ +// fgMorphMultiOp: Morph a GenTreeMultiOp (SIMD/HWINTRINSIC) tree. +// +// Arguments: +// multiOp - The tree to morph +// +// Return Value: +// The fully morphed tree. +// +GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) +{ + gtUpdateNodeOperSideEffects(multiOp); + for (GenTree** use : multiOp->UseEdges()) + { + *use = fgMorphTree(*use); + multiOp->gtFlags |= ((*use)->gtFlags & GTF_ALL_EFFECT); + } + #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - case GT_HWINTRINSIC: + if (opts.OptimizationEnabled() && multiOp->OperIs(GT_HWINTRINSIC)) + { + GenTreeHWIntrinsic* hw = multiOp->AsHWIntrinsic(); + switch (hw->GetHWIntrinsicId()) { - GenTreeHWIntrinsic* hw = tree->AsHWIntrinsic(); - switch (hw->gtHWIntrinsicId) + case NI_SSE_Xor: + case NI_SSE2_Xor: + case NI_AVX_Xor: + case NI_AVX2_Xor: { - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: + // Transform XOR(X, 0) to X for vectors + GenTree* op1 = hw->Op(1); + GenTree* op2 = hw->Op(2); + if (!gtIsActiveCSE_Candidate(hw)) { - // Transform XOR(X, 0) to X for vectors - GenTree* op1 = hw->gtGetOp1(); - GenTree* op2 = hw->gtGetOp2(); - if (!gtIsActiveCSE_Candidate(tree)) + if (op1->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op1)) { - if (op1->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op1)) - { - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op1); - INDEBUG(op2->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op2; - } - if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2)) - { - DEBUG_DESTROY_NODE(tree); - DEBUG_DESTROY_NODE(op2); - INDEBUG(op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); - return op1; - } + DEBUG_DESTROY_NODE(hw); + DEBUG_DESTROY_NODE(op1); + return op2; + } + if (op2->IsIntegralConstVector(0) && !gtIsActiveCSE_Candidate(op2)) + { + DEBUG_DESTROY_NODE(hw); + DEBUG_DESTROY_NODE(op2); + return op1; } - break; } - - default: - break; + break; } - break; + + default: + break; } + } #endif // defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - default: - break; - } - return tree; + return multiOp; } +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) //------------------------------------------------------------------------ // fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b @@ -14813,6 +14826,17 @@ GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac) tree = fgMorphCall(tree->AsCall()); break; +#if defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) +#if defined(FEATURE_SIMD) + case GT_SIMD: +#endif +#if defined(FEATURE_HW_INTRINSICS) + case GT_HWINTRINSIC: +#endif + tree = fgMorphMultiOp(tree->AsMultiOp()); + break; +#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS) + case GT_ARR_ELEM: tree->AsArrElem()->gtArrObj = fgMorphTree(tree->AsArrElem()->gtArrObj); @@ -18070,64 +18094,3 @@ bool Compiler::fgCanTailCallViaJitHelper() return true; #endif } - -static const int numberOfTrackedFlags = 5; -static const GenTreeFlags trackedFlags[numberOfTrackedFlags] = {GTF_ASG, GTF_CALL, GTF_EXCEPT, GTF_GLOB_REF, - GTF_ORDER_SIDEEFF}; - -//------------------------------------------------------------------------ -// fgMorphArgList: morph argument list tree without recursion. -// -// Arguments: -// args - argument list tree to morph; -// mac - morph address context, used to morph children. -// -// Return Value: -// morphed argument list. -// -GenTreeArgList* Compiler::fgMorphArgList(GenTreeArgList* args, MorphAddrContext* mac) -{ - // Use a non-recursive algorithm that morphs all actual list values, - // memorizes the last node for each effect flag and resets - // them during the second iteration. - assert((trackedFlags[0] | trackedFlags[1] | trackedFlags[2] | trackedFlags[3] | trackedFlags[4]) == GTF_ALL_EFFECT); - - GenTree* memorizedLastNodes[numberOfTrackedFlags] = {nullptr}; - - for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) - { - // Morph actual list values. - GenTree*& arg = listNode->Current(); - arg = fgMorphTree(arg, mac); - - // Remember the last list node with each flag. - for (int i = 0; i < numberOfTrackedFlags; ++i) - { - if ((arg->gtFlags & trackedFlags[i]) != 0) - { - memorizedLastNodes[i] = listNode; - } - } - } - - for (GenTreeArgList* listNode = args; listNode != nullptr; listNode = listNode->Rest()) - { - // Clear all old effects from the list node. - listNode->gtFlags &= ~GTF_ALL_EFFECT; - - // Spread each flag to all list nodes (to the prefix) before the memorized last node. - for (int i = 0; i < numberOfTrackedFlags; ++i) - { - if (memorizedLastNodes[i] != nullptr) - { - listNode->gtFlags |= trackedFlags[i]; - } - if (listNode == memorizedLastNodes[i]) - { - memorizedLastNodes[i] = nullptr; - } - } - } - - return args; -} diff --git a/src/coreclr/jit/optcse.cpp b/src/coreclr/jit/optcse.cpp index 9aee78d68efca..77eec23841181 100644 --- a/src/coreclr/jit/optcse.cpp +++ b/src/coreclr/jit/optcse.cpp @@ -3635,7 +3635,7 @@ bool Compiler::optIsCSEcandidate(GenTree* tree) { GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); assert(hwIntrinsicNode != nullptr); - HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwIntrinsicNode->gtHWIntrinsicId); + HWIntrinsicCategory category = HWIntrinsicInfo::lookupCategory(hwIntrinsicNode->GetHWIntrinsicId()); switch (category) { diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 8602034567652..9eac36c87402b 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -582,26 +582,9 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge const bool isLateArg = (node->gtFlags & GTF_LATE_ARG) != 0; #endif - // First, remove any preceeding list nodes, which are not otherwise visited by the tree walk. - // - // NOTE: GT_LIST nodes used by GT_HWINTRINSIC nodes will in fact be visited. - for (GenTree* prev = node->gtPrev; (prev != nullptr) && prev->OperIs(GT_LIST); prev = node->gtPrev) - { - prev->gtFlags &= ~GTF_REVERSE_OPS; - BlockRange().Remove(prev); - } - - // Now clear the REVERSE_OPS flag on the current node. + // Clear the REVERSE_OPS flag on the current node. node->gtFlags &= ~GTF_REVERSE_OPS; - // In addition, remove the current node if it is a GT_LIST node that is not an aggregate. - if (node->OperIs(GT_LIST)) - { - GenTreeArgList* list = node->AsArgList(); - BlockRange().Remove(list); - return Compiler::WALK_CONTINUE; - } - LIR::Use use; if (parentStack.Height() < 2) { @@ -754,13 +737,16 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge simdNode->gtType = TYP_SIMD8; } // Certain SIMD trees require rationalizing. - if (simdNode->AsSIMD()->gtSIMDIntrinsicID == SIMDIntrinsicInitArray) + if (simdNode->AsSIMD()->GetSIMDIntrinsicId() == SIMDIntrinsicInitArray) { // Rewrite this as an explicit load. JITDUMP("Rewriting GT_SIMD array init as an explicit load:\n"); unsigned int baseTypeSize = genTypeSize(simdNode->GetSimdBaseType()); - GenTree* address = new (comp, GT_LEA) GenTreeAddrMode(TYP_BYREF, simdNode->gtOp1, simdNode->gtOp2, - baseTypeSize, OFFSETOF__CORINFO_Array__data); + + GenTree* base = simdNode->Op(1); + GenTree* index = (simdNode->GetOperandCount() == 2) ? simdNode->Op(2) : nullptr; + GenTree* address = new (comp, GT_LEA) + GenTreeAddrMode(TYP_BYREF, base, index, baseTypeSize, OFFSETOF__CORINFO_Array__data); GenTree* ind = comp->gtNewOperNode(GT_IND, simdType, address); BlockRange().InsertBefore(simdNode, address, ind); @@ -776,16 +762,15 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge // of a different width. If that assumption changes, we will EITHER have to make these type // transformations during importation, and plumb the types all the way through the JIT, // OR add a lot of special handling here. - GenTree* op1 = simdNode->gtGetOp1(); - if (op1 != nullptr && op1->gtType == TYP_STRUCT) - { - op1->gtType = simdType; - } - GenTree* op2 = simdNode->gtGetOp2IfPresent(); - if (op2 != nullptr && op2->gtType == TYP_STRUCT) + // TODO-Review: the comment above seems outdated. TYP_SIMDs have been "plumbed through" the Jit. + // It may be that this code is actually dead. + for (GenTree* operand : simdNode->Operands()) { - op2->gtType = simdType; + if (operand->TypeIs(TYP_STRUCT)) + { + operand->ChangeType(simdType); + } } } } @@ -812,8 +797,8 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, Compiler::Ge #ifdef TARGET_ARM64 // Special case for GetElement/ToScalar because they take Vector64 and return T // and T can be long or ulong. - if (!(hwIntrinsicNode->gtHWIntrinsicId == NI_Vector64_GetElement || - hwIntrinsicNode->gtHWIntrinsicId == NI_Vector64_ToScalar)) + if (!((hwIntrinsicNode->GetHWIntrinsicId() == NI_Vector64_GetElement) || + (hwIntrinsicNode->GetHWIntrinsicId() == NI_Vector64_ToScalar))) #endif { // This happens when it is consumed by a GT_RET_EXPR. diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index d60fb49a5d9b0..89a880f1980f9 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -1463,9 +1463,8 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId, tempBaseJitType = CORINFO_TYPE_INT; initVal = gtNewIconNode((ssize_t)constVal); } - initVal->gtType = JITtype2varType(tempBaseJitType); - GenTree* constVector = - gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, tempBaseJitType, size); + initVal->gtType = JITtype2varType(tempBaseJitType); + GenTree* constVector = gtNewSIMDNode(simdType, initVal, SIMDIntrinsicInit, tempBaseJitType, size); // Assign constVector to a temp, since we intend to use it more than once // TODO-CQ: We have quite a few such constant vectors constructed during @@ -1983,10 +1982,13 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, // SIMDIntrinsicInitN // op2 - list of initializer values stitched into a list // op1 - byref of vector - bool initFromFirstArgIndir = false; + IntrinsicNodeBuilder nodeBuilder(getAllocator(CMK_ASTNode), argCount - 1); + bool initFromFirstArgIndir = false; + if (simdIntrinsicID == SIMDIntrinsicInit) { op2 = impSIMDPopStack(simdBaseType); + nodeBuilder.AddOperand(0, op2); } else { @@ -1997,21 +1999,19 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, unsigned elementCount = getSIMDVectorLength(size, simdBaseType); noway_assert(initCount == elementCount); - // Build a GT_LIST with the N values. + // Build an array with the N values. // We must maintain left-to-right order of the args, but we will pop // them off in reverse order (the Nth arg was pushed onto the stack last). - GenTree* list = nullptr; - GenTree* firstArg = nullptr; GenTree* prevArg = nullptr; bool areArgsContiguous = true; for (unsigned i = 0; i < initCount; i++) { - GenTree* nextArg = impSIMDPopStack(simdBaseType); + GenTree* arg = impSIMDPopStack(simdBaseType); + if (areArgsContiguous) { - GenTree* curArg = nextArg; - firstArg = curArg; + GenTree* curArg = arg; if (prevArg != nullptr) { @@ -2021,7 +2021,8 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, prevArg = curArg; } - list = new (this, GT_LIST) GenTreeOp(GT_LIST, simdBaseType, nextArg, list); + assert(genActualType(arg) == genActualType(simdBaseType)); + nodeBuilder.AddOperand(initCount - i - 1, arg); } if (areArgsContiguous && simdBaseType == TYP_FLOAT) @@ -2030,20 +2031,15 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, // we intialize the vector from first argument address, only when // the simdBaseType is TYP_FLOAT and the arguments are located contiguously in memory initFromFirstArgIndir = true; - GenTree* op2Address = createAddressNodeForSIMDInit(firstArg, size); + GenTree* op2Address = createAddressNodeForSIMDInit(nodeBuilder.GetOperand(0), size); var_types simdType = getSIMDTypeForSize(size); op2 = gtNewOperNode(GT_IND, simdType, op2Address); } - else - { - op2 = list; - } } op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); assert(op1->TypeGet() == TYP_BYREF); - assert(genActualType(op2->TypeGet()) == genActualType(simdBaseType) || initFromFirstArgIndir); // For integral base types of size less than TYP_INT, expand the initializer // to fill size of TYP_INT bytes. @@ -2083,6 +2079,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } else { + // TODO-Casts: this cast is useless. assert(simdBaseType == TYP_UBYTE || simdBaseType == TYP_USHORT); t1 = gtNewCastNode(TYP_INT, op2, false, TYP_INT); } @@ -2092,8 +2089,8 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, op2 = gtNewOperNode(GT_MUL, TYP_INT, t1, t2); // Construct a vector of TYP_INT with the new initializer and cast it back to vector of simdBaseType - simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, CORINFO_TYPE_INT, size); - simdTree = gtNewSIMDNode(simdType, simdTree, nullptr, SIMDIntrinsicCast, simdBaseJitType, size); + simdTree = gtNewSIMDNode(simdType, op2, simdIntrinsicID, CORINFO_TYPE_INT, size); + simdTree = gtNewSIMDNode(simdType, simdTree, SIMDIntrinsicCast, simdBaseJitType, size); } else { @@ -2110,7 +2107,8 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, } else { - simdTree = gtNewSIMDNode(simdType, op2, nullptr, simdIntrinsicID, simdBaseJitType, size); + simdTree = new (this, GT_SIMD) + GenTreeSIMD(simdType, std::move(nodeBuilder), simdIntrinsicID, simdBaseJitType, size); } } @@ -2227,8 +2225,10 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, if (simdIntrinsicID == SIMDIntrinsicInitArray || simdIntrinsicID == SIMDIntrinsicInitArrayX) { - op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); - simdTree = gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, simdBaseJitType, size); + op1 = getOp1ForConstructor(opcode, newobjThis, clsHnd); + simdTree = (op3 != nullptr) + ? gtNewSIMDNode(simdType, op2, op3, SIMDIntrinsicInitArray, simdBaseJitType, size) + : gtNewSIMDNode(simdType, op2, SIMDIntrinsicInitArray, simdBaseJitType, size); copyBlkDst = op1; doCopyBlk = true; } @@ -2337,7 +2337,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, { op1 = impSIMDPopStack(simdType, instMethod); - simdTree = gtNewSIMDNode(simdType, op1, nullptr, simdIntrinsicID, simdBaseJitType, size); + simdTree = gtNewSIMDNode(simdType, op1, simdIntrinsicID, simdBaseJitType, size); retVal = simdTree; } break; @@ -2347,7 +2347,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode, #ifdef TARGET_64BIT op1 = impSIMDPopStack(simdType, instMethod); - simdTree = gtNewSIMDNode(simdType, op1, nullptr, simdIntrinsicID, simdBaseJitType, size); + simdTree = gtNewSIMDNode(simdType, op1, simdIntrinsicID, simdBaseJitType, size); retVal = simdTree; #else JITDUMP("SIMD Conversion to Int64 is not supported on this platform\n"); diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index 07d70e20d503d..8388f1ef3bc61 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -42,7 +42,7 @@ enum SIMDLevel extern const char* const simdIntrinsicNames[]; #endif -enum SIMDIntrinsicID +enum SIMDIntrinsicID : uint16_t { #define SIMD_INTRINSIC(m, i, id, n, r, ac, arg1, arg2, arg3, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10) SIMDIntrinsic##id, #include "simdintrinsiclist.h" diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index 5a3f0296d1813..9362aa05f521a 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -387,9 +387,9 @@ void CodeGen::genSIMDZero(var_types targetType, var_types baseType, regNumber ta // void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInit); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInit); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); @@ -501,7 +501,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) srcReg = targetReg; } - ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); GetEmitter()->emitIns_R_R(ins, emitActualTypeSize(targetType), targetReg, srcReg); } else @@ -569,7 +569,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitN); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicInitN); // Right now this intrinsic is supported only on TYP_FLOAT vectors var_types baseType = simdNode->GetSimdBaseType(); @@ -598,19 +598,17 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // We will first consume the list items in execution (left to right) order, // and record the registers. regNumber operandRegs[SIMD_INTRINSIC_MAX_PARAM_COUNT]; - unsigned initCount = 0; - for (GenTree* list = simdNode->gtGetOp1(); list != nullptr; list = list->gtGetOp2()) + size_t initCount = simdNode->GetOperandCount(); + for (size_t i = 1; i <= initCount; i++) { - assert(list->OperGet() == GT_LIST); - GenTree* listItem = list->gtGetOp1(); - assert(listItem->TypeGet() == baseType); - assert(!listItem->isContained()); - regNumber operandReg = genConsumeReg(listItem); - operandRegs[initCount] = operandReg; - initCount++; + GenTree* operand = simdNode->Op(i); + assert(operand->TypeIs(baseType)); + assert(!operand->isContained()); + + operandRegs[i - 1] = genConsumeReg(operand); } - unsigned int offset = 0; + unsigned offset = 0; for (unsigned i = 0; i < initCount; i++) { // We will now construct the vector from the list items in reverse order. @@ -649,17 +647,17 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicCast); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); regNumber op1Reg = genConsumeReg(op1); - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); - if (simdNode->gtSIMDIntrinsicID != SIMDIntrinsicCast) + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); + if (simdNode->GetSIMDIntrinsicId() != SIMDIntrinsicCast) { inst_RV_RV(ins, targetReg, op1Reg, targetType, emitActualTypeSize(targetType)); } @@ -681,17 +679,17 @@ void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode) { - SIMDIntrinsicID intrinsicID = simdNode->gtSIMDIntrinsicID; + SIMDIntrinsicID intrinsicID = simdNode->GetSIMDIntrinsicId(); assert((intrinsicID == SIMDIntrinsicConvertToSingle) || (intrinsicID == SIMDIntrinsicConvertToInt32)); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); regNumber op1Reg = genConsumeReg(op1); - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); if (intrinsicID == SIMDIntrinsicConvertToSingle && baseType == TYP_UINT) { regNumber tmpIntReg = simdNode->GetSingleTempReg(RBM_ALLINT); @@ -812,10 +810,10 @@ void CodeGen::genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID, // void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode) { - SIMDIntrinsicID intrinsicID = simdNode->gtSIMDIntrinsicID; + SIMDIntrinsicID intrinsicID = simdNode->GetSIMDIntrinsicId(); assert((intrinsicID == SIMDIntrinsicConvertToDouble) || (intrinsicID == SIMDIntrinsicConvertToInt64)); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); @@ -1125,22 +1123,23 @@ void CodeGen::genSIMDExtractUpperHalf(GenTreeSIMD* simdNode, regNumber srcReg, r // void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSub || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseAnd || - simdNode->gtSIMDIntrinsicID == SIMDIntrinsicBitwiseOr); + assert((simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicSub) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicBitwiseAnd) || + (simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicBitwiseOr)); - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); + GenTree* op1 = simdNode->Op(1); + GenTree* op2 = simdNode->Op(2); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); assert(targetReg != REG_NA); var_types targetType = simdNode->TypeGet(); - genConsumeOperands(simdNode); + genConsumeMultiOpOperands(simdNode); regNumber op1Reg = op1->GetRegNum(); regNumber op2Reg = op2->GetRegNum(); regNumber otherReg = op2Reg; - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); // Currently AVX doesn't support integer. // if the ins is INS_cvtsi2ss or INS_cvtsi2sd, we won't use AVX. @@ -1178,19 +1177,19 @@ void CodeGen::genSIMDIntrinsicBinOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); + GenTree* op1 = simdNode->Op(1); + GenTree* op2 = simdNode->Op(2); var_types baseType = simdNode->GetSimdBaseType(); regNumber targetReg = simdNode->GetRegNum(); var_types targetType = simdNode->TypeGet(); SIMDLevel level = compiler->getSIMDSupportLevel(); - genConsumeOperands(simdNode); + genConsumeMultiOpOperands(simdNode); regNumber op1Reg = op1->GetRegNum(); regNumber op2Reg = op2->GetRegNum(); regNumber otherReg = op2Reg; - switch (simdNode->gtSIMDIntrinsicID) + switch (simdNode->GetSIMDIntrinsicId()) { case SIMDIntrinsicEqual: { @@ -1206,7 +1205,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) #endif unsigned ival = 0; - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType, &ival); + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType, &ival); // targetReg = op1reg > op2reg // Therefore, we can optimize if op1Reg == targetReg @@ -1215,7 +1214,7 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) { if (op2Reg == targetReg) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicEqual); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicEqual); otherReg = op1Reg; } else @@ -1255,11 +1254,11 @@ void CodeGen::genSIMDIntrinsicRelOp(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicShuffleSSE2); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicShuffleSSE2); noway_assert(compiler->getSIMDSupportLevel() == SIMD_SSE2_Supported); - GenTree* op1 = simdNode->gtGetOp1(); - GenTree* op2 = simdNode->gtGetOp2(); + GenTree* op1 = simdNode->Op(1); + GenTree* op2 = simdNode->Op(2); assert(op2->isContained()); assert(op2->IsCnsIntOrI()); ssize_t shuffleControl = op2->AsIntConCommon()->IconValue(); @@ -1271,7 +1270,7 @@ void CodeGen::genSIMDIntrinsicShuffleSSE2(GenTreeSIMD* simdNode) regNumber op1Reg = genConsumeReg(op1); inst_Mov(targetType, targetReg, op1Reg, /* canSkip */ true); - instruction ins = getOpForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID, baseType); + instruction ins = getOpForSIMDIntrinsic(simdNode->GetSIMDIntrinsicId(), baseType); assert((shuffleControl >= 0) && (shuffleControl <= 255)); GetEmitter()->emitIns_R_R_I(ins, emitTypeSize(baseType), targetReg, targetReg, (int8_t)shuffleControl); genProduceReg(simdNode); @@ -1525,9 +1524,9 @@ void CodeGen::genPutArgStkSIMD12(GenTree* treeNode) // void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperSave); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicUpperSave); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32); regNumber targetReg = simdNode->GetRegNum(); regNumber op1Reg = genConsumeReg(op1); @@ -1567,9 +1566,9 @@ void CodeGen::genSIMDIntrinsicUpperSave(GenTreeSIMD* simdNode) // void CodeGen::genSIMDIntrinsicUpperRestore(GenTreeSIMD* simdNode) { - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicUpperRestore); + assert(simdNode->GetSIMDIntrinsicId() == SIMDIntrinsicUpperRestore); - GenTree* op1 = simdNode->gtGetOp1(); + GenTree* op1 = simdNode->Op(1); assert(op1->IsLocal() && op1->TypeGet() == TYP_SIMD32); regNumber srcReg = simdNode->GetRegNum(); regNumber lclVarReg = genConsumeReg(op1); @@ -1612,7 +1611,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode) noway_assert(!"SIMD intrinsic with unsupported base type."); } - switch (simdNode->gtSIMDIntrinsicID) + switch (simdNode->GetSIMDIntrinsicId()) { case SIMDIntrinsicInit: genSIMDIntrinsicInit(simdNode); diff --git a/src/coreclr/jit/ssabuilder.h b/src/coreclr/jit/ssabuilder.h index 6d1a9fbd5542f..68ebb084f08db 100644 --- a/src/coreclr/jit/ssabuilder.h +++ b/src/coreclr/jit/ssabuilder.h @@ -29,8 +29,9 @@ class SsaBuilder // Requires stmt nodes to be already sequenced in evaluation order. Analyzes the graph // for introduction of phi-nodes as GT_PHI tree nodes at the beginning of each block. // Each GT_LCL_VAR is given its ssa number through its GetSsaNum() field in the node. - // Each GT_PHI node will have gtOp1 set to lhs of the phi node and the gtOp2 to be a - // GT_LIST of GT_PHI_ARG. Each use or def is denoted by the corresponding GT_LCL_VAR + // Each GT_PHI node will be under a GT_ASG node with the LHS set to the local node and + // the RHS to the GT_PHI itself. The inputs to the PHI are represented as a linked list + // of GT_PHI_ARG nodes. Each use or def is denoted by the corresponding GT_LCL_VAR // tree. For example, to get all uses of a particular variable fully defined by its // lclNum and ssaNum, one would use m_uses and look up all the uses. Similarly, a single // def of an SSA variable can be looked up similarly using m_defs member. @@ -70,7 +71,7 @@ class SsaBuilder // Requires "postOrder" to hold the blocks of the flowgraph in topologically sorted order. Requires // count to be the valid entries in the "postOrder" array. Inserts GT_PHI nodes at the beginning // of basic blocks that require them like so: - // GT_ASG(GT_LCL_VAR, GT_PHI(GT_PHI_ARG(GT_LCL_VAR, Block*), GT_LIST(GT_PHI_ARG(GT_LCL_VAR, Block*), NULL)); + // GT_ASG(GT_LCL_VAR, GT_PHI(GT_PHI_ARG(ssaNum, Block*), GT_PHI_ARG(ssaNum, Block*), ...)); void InsertPhiFunctions(BasicBlock** postOrder, int count); // Rename all definitions and uses within the compiled method. diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index d1f108d0c40ad..d0a1132540bc5 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -343,11 +343,11 @@ VNFunc GetVNFuncForNode(GenTree* node) #ifdef FEATURE_SIMD case GT_SIMD: - return VNFunc(VNF_SIMD_FIRST + node->AsSIMD()->gtSIMDIntrinsicID); + return VNFunc(VNF_SIMD_FIRST + node->AsSIMD()->GetSIMDIntrinsicId()); #endif // FEATURE_SIMD #ifdef FEATURE_HW_INTRINSICS case GT_HWINTRINSIC: - return VNFunc(VNF_HWI_FIRST + (node->AsHWIntrinsic()->gtHWIntrinsicId - NI_HW_INTRINSIC_START - 1)); + return VNFunc(VNF_HWI_FIRST + (node->AsHWIntrinsic()->GetHWIntrinsicId() - NI_HW_INTRINSIC_START - 1)); #endif // FEATURE_HW_INTRINSICS case GT_CAST: @@ -9092,21 +9092,6 @@ void Compiler::fgValueNumberTree(GenTree* tree) { fgValueNumberIntrinsic(tree); } - -#ifdef FEATURE_SIMD - else if (tree->OperGet() == GT_SIMD) - { - fgValueNumberSimd(tree); - } -#endif // FEATURE_SIMD - -#ifdef FEATURE_HW_INTRINSICS - else if (tree->OperGet() == GT_HWINTRINSIC) - { - fgValueNumberHWIntrinsic(tree); - } -#endif // FEATURE_HW_INTRINSICS - else // Look up the VNFunc for the node { VNFunc vnf = GetVNFuncForNode(tree); @@ -9161,7 +9146,7 @@ void Compiler::fgValueNumberTree(GenTree* tree) ValueNumPair op2VNPair; if (tree->AsOp()->gtOp2 == nullptr) { - // Handle any GT_LIST nodes as they can have a nullptr for op2. + // Handle any GT_LEA nodes as they can have a nullptr for op2. op2VNPair.SetBoth(ValueNumStore::VNForNull()); } else @@ -9319,7 +9304,6 @@ void Compiler::fgValueNumberTree(GenTree* tree) } case GT_JTRUE: - case GT_LIST: // These nodes never need to have a ValueNumber tree->gtVNPair.SetBoth(ValueNumStore::NoVN); break; @@ -9353,6 +9337,18 @@ void Compiler::fgValueNumberTree(GenTree* tree) fgValueNumberCall(tree->AsCall()); break; +#ifdef FEATURE_SIMD + case GT_SIMD: + fgValueNumberSimd(tree->AsSIMD()); + break; +#endif // FEATURE_SIMD + +#ifdef FEATURE_HW_INTRINSICS + case GT_HWINTRINSIC: + fgValueNumberHWIntrinsic(tree->AsHWIntrinsic()); + break; +#endif // FEATURE_HW_INTRINSICS + case GT_CMPXCHG: // Specialop { // For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed. @@ -9438,17 +9434,13 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) vnStore->VNPWithExc(vnStore->EvalMathFuncUnary(tree->TypeGet(), intrinsic->gtIntrinsicName, arg0VNP), arg0VNPx); } - else if (!intrinsic->AsOp()->gtOp1->OperIsList()) + else { ValueNumPair newVNP = vnStore->EvalMathFuncBinary(tree->TypeGet(), intrinsic->gtIntrinsicName, arg0VNP, arg1VNP); ValueNumPair excSet = vnStore->VNPExcSetUnion(arg0VNPx, arg1VNPx); intrinsic->gtVNPair = vnStore->VNPWithExc(newVNP, excSet); } - else - { - unreached(); - } } else { @@ -9460,30 +9452,24 @@ void Compiler::fgValueNumberIntrinsic(GenTree* tree) #ifdef FEATURE_SIMD // Does value-numbering for a GT_SIMD node. -void Compiler::fgValueNumberSimd(GenTree* tree) +void Compiler::fgValueNumberSimd(GenTreeSIMD* tree) { - assert(tree->OperGet() == GT_SIMD); - GenTreeSIMD* simdNode = tree->AsSIMD(); - assert(simdNode != nullptr); - VNFunc simdFunc = GetVNFuncForNode(tree); ValueNumPair excSetPair; ValueNumPair normalPair; // There are some SIMD operations that have zero args, i.e. NI_Vector128_Zero - if (tree->AsOp()->gtOp1 == nullptr) + if (tree->GetOperandCount() == 0) { excSetPair = ValueNumStore::VNPForEmptyExcSet(); normalPair = vnStore->VNPairForFunc(tree->TypeGet(), simdFunc); } - else if (tree->AsOp()->gtOp1->OperIs(GT_LIST)) + // TODO-List-Cleanup: the "tree->GetSIMDIntrinsicId() == SIMDIntrinsicInitN" case is a quirk + // to get zero diffs - Vector2(float, float) was imported with lists - remove it. + else if ((tree->GetOperandCount() > 2) || (tree->GetSIMDIntrinsicId() == SIMDIntrinsicInitN)) { - assert(tree->AsOp()->gtOp2 == nullptr); - - // We have a SIMD node in the GT_LIST form with 3 or more args - // For now we will generate a unique value number for this case. - - // Generate a unique VN + // We have a SIMD node with 3 or more args. To retain the + // previous behavior, we will generate a unique VN for this case. tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); return; } @@ -9492,25 +9478,25 @@ void Compiler::fgValueNumberSimd(GenTree* tree) ValueNumPair resvnp = ValueNumPair(); ValueNumPair op1vnp; ValueNumPair op1Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + vnStore->VNPUnpackExc(tree->Op(1)->gtVNPair, &op1vnp, &op1Xvnp); ValueNum addrVN = ValueNumStore::NoVN; - bool isMemoryLoad = simdNode->OperIsMemoryLoad(); + bool isMemoryLoad = tree->OperIsMemoryLoad(); if (isMemoryLoad) { // Currently the only SIMD operation with MemoryLoad sematics is SIMDIntrinsicInitArray // and it has to be handled specially since it has an optional op2 // - assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicInitArray); + assert(tree->GetSIMDIntrinsicId() == SIMDIntrinsicInitArray); // rationalize rewrites this as an explicit load with op1 as the base address assert(tree->OperIsImplicitIndir()); ValueNumPair op2vnp; - if (tree->AsOp()->gtOp2 == nullptr) + if (tree->GetOperandCount() != 2) { - // a nullptr for op2 means that we have an impicit index of zero + // No op2 means that we have an impicit index of zero op2vnp = ValueNumPair(vnStore->VNZeroForType(TYP_INT), vnStore->VNZeroForType(TYP_INT)); excSetPair = op1Xvnp; @@ -9518,7 +9504,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) else // We have an explicit index in op2 { ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + vnStore->VNPUnpackExc(tree->Op(2)->gtVNPair, &op2vnp, &op2Xvnp); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); } @@ -9530,7 +9516,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) if (verbose) { printf("Treating GT_SIMD %s as a ByrefExposed load , addrVN is ", - simdIntrinsicNames[simdNode->gtSIMDIntrinsicID]); + simdIntrinsicNames[tree->GetSIMDIntrinsicId()]); vnPrint(addrVN, 0); } #endif // DEBUG @@ -9541,15 +9527,15 @@ void Compiler::fgValueNumberSimd(GenTree* tree) tree->gtVNPair.SetLiberal(loadVN); tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, excSetPair); - fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + fgValueNumberAddExceptionSetForIndirection(tree, tree->Op(1)); return; } - bool encodeResultType = vnEncodesResultTypeForSIMDIntrinsic(simdNode->gtSIMDIntrinsicID); + bool encodeResultType = vnEncodesResultTypeForSIMDIntrinsic(tree->GetSIMDIntrinsicId()); if (encodeResultType) { - ValueNum simdTypeVN = vnStore->VNForSimdType(simdNode->GetSimdSize(), simdNode->GetSimdBaseType()); + ValueNum simdTypeVN = vnStore->VNForSimdType(tree->GetSimdSize(), tree->GetSimdBaseType()); resvnp.SetBoth(simdTypeVN); #ifdef DEBUG @@ -9562,9 +9548,9 @@ void Compiler::fgValueNumberSimd(GenTree* tree) #endif } - if (tree->AsOp()->gtOp2 == nullptr) + if (tree->GetOperandCount() == 1) { - // Unary SIMD nodes have a nullptr for op2. + // A unary SIMD node. excSetPair = op1Xvnp; if (encodeResultType) { @@ -9581,7 +9567,7 @@ void Compiler::fgValueNumberSimd(GenTree* tree) { ValueNumPair op2vnp; ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + vnStore->VNPUnpackExc(tree->Op(2)->gtVNPair, &op2vnp, &op2Xvnp); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); if (encodeResultType) @@ -9602,33 +9588,26 @@ void Compiler::fgValueNumberSimd(GenTree* tree) #ifdef FEATURE_HW_INTRINSICS // Does value-numbering for a GT_HWINTRINSIC node -void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) +void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) { - assert(tree->OperGet() == GT_HWINTRINSIC); - GenTreeHWIntrinsic* hwIntrinsicNode = tree->AsHWIntrinsic(); - assert(hwIntrinsicNode != nullptr); - // For safety/correctness we must mutate the global heap valuenumber // for any HW intrinsic that performs a memory store operation - if (hwIntrinsicNode->OperIsMemoryStore()) + if (tree->OperIsMemoryStore()) { fgMutateGcHeap(tree DEBUGARG("HWIntrinsic - MemoryStore")); } - if ((tree->AsOp()->gtOp1 != nullptr) && tree->gtGetOp1()->OperIs(GT_LIST)) + if (tree->GetOperandCount() > 2) { - // TODO-CQ: allow intrinsics with GT_LIST to be properly VN'ed, it will + // TODO-CQ: allow intrinsics with > 2 operands to be properly VN'ed, it will // allow use to process things like Vector128.Create(1,2,3,4) etc. - // Generate unique VN for now. + // Generate unique VN for now to retaing previois behavior. tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet())); return; } - // We don't expect GT_LIST to be in the second op - assert((tree->AsOp()->gtOp2 == nullptr) || !tree->gtGetOp2()->OperIs(GT_LIST)); - VNFunc func = GetVNFuncForNode(tree); - bool isMemoryLoad = hwIntrinsicNode->OperIsMemoryLoad(); + bool isMemoryLoad = tree->OperIsMemoryLoad(); // If we have a MemoryLoad operation we will use the fgValueNumberByrefExposedLoad // method to assign a value number that depends upon fgCurMemoryVN[ByrefExposed] ValueNumber @@ -9637,7 +9616,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) { ValueNumPair op1vnp; ValueNumPair op1Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + vnStore->VNPUnpackExc(tree->Op(1)->gtVNPair, &op1vnp, &op1Xvnp); // The addrVN incorporates both op1's ValueNumber and the func operation // The func is used because operations such as LoadLow and LoadHigh perform @@ -9652,11 +9631,11 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) tree->gtVNPair.SetLiberal(loadVN); tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet())); tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, op1Xvnp); - fgValueNumberAddExceptionSetForIndirection(tree, tree->AsOp()->gtOp1); + fgValueNumberAddExceptionSetForIndirection(tree, tree->Op(1)); return; } - bool encodeResultType = vnEncodesResultTypeForHWIntrinsic(hwIntrinsicNode->gtHWIntrinsicId); + bool encodeResultType = vnEncodesResultTypeForHWIntrinsic(tree->GetHWIntrinsicId()); ValueNumPair excSetPair = ValueNumStore::VNPForEmptyExcSet(); ValueNumPair normalPair; @@ -9664,8 +9643,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) if (encodeResultType) { - ValueNum simdTypeVN = - vnStore->VNForSimdType(hwIntrinsicNode->GetSimdSize(), hwIntrinsicNode->GetSimdBaseType()); + ValueNum simdTypeVN = vnStore->VNForSimdType(tree->GetSimdSize(), tree->GetSimdBaseType()); resvnp.SetBoth(simdTypeVN); #ifdef DEBUG @@ -9678,10 +9656,10 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) #endif } - const bool isVariableNumArgs = HWIntrinsicInfo::lookupNumArgs(hwIntrinsicNode->gtHWIntrinsicId) == -1; + const bool isVariableNumArgs = HWIntrinsicInfo::lookupNumArgs(tree->GetHWIntrinsicId()) == -1; // There are some HWINTRINSICS operations that have zero args, i.e. NI_Vector128_Zero - if (tree->AsOp()->gtOp1 == nullptr) + if (tree->GetOperandCount() == 0) { // Currently we don't have intrinsics with variable number of args with a parameter-less option. assert(!isVariableNumArgs); @@ -9702,9 +9680,9 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) { ValueNumPair op1vnp; ValueNumPair op1Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp1->gtVNPair, &op1vnp, &op1Xvnp); + vnStore->VNPUnpackExc(tree->Op(1)->gtVNPair, &op1vnp, &op1Xvnp); - if (tree->AsOp()->gtOp2 == nullptr) + if (tree->GetOperandCount() == 1) { excSetPair = op1Xvnp; @@ -9723,7 +9701,7 @@ void Compiler::fgValueNumberHWIntrinsic(GenTree* tree) { ValueNumPair op2vnp; ValueNumPair op2Xvnp; - vnStore->VNPUnpackExc(tree->AsOp()->gtOp2->gtVNPair, &op2vnp, &op2Xvnp); + vnStore->VNPUnpackExc(tree->Op(2)->gtVNPair, &op2vnp, &op2Xvnp); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); if (encodeResultType) diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index b1e840ceb5576..da8d234dd988e 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -428,8 +428,8 @@ class ValueNumStore return ValueNum(SRC_ReadOnlyHeap); } - // A special value number for "void" -- sometimes a type-void thing is an argument to a - // GT_LIST, and we want the args to be non-NoVN. + // A special value number for "void" -- sometimes a type-void thing is an argument, + // and we want the args to be non-NoVN. static ValueNum VNForVoid() { // We reserve Chunk 0 for "special" VNs. Let SRC_Void (== 4) be the value for "void".