From 60925bc8f3e3c22efee705a9038af78a55cee346 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 21:28:33 -0700 Subject: [PATCH 01/10] Fix AV in GatherVector --- src/coreclr/jit/gentree.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 9a899443294b5..d4332600df1e7 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27525,11 +27525,11 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend: case NI_Sve_GatherVectorUInt32ZeroExtend: addr = Op(2); - if (varTypeIsSIMD(addr->gtType)) + if (!varTypeIsI(addr)) { - // The address is a vector of addresses. - // Return true, but do not set pAddr. - return true; + // For some variants, the address is in vector. + // Return false for such cases. + return false; } break; From b7668171618261faa4f3901e38b676626504a58e Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:05:02 -0700 Subject: [PATCH 02/10] Remove INS_SCALABLE_OPTS_UNPREDICATED --- src/coreclr/jit/codegenarm64test.cpp | 324 +++++++------------- src/coreclr/jit/emitarm64.cpp | 3 +- src/coreclr/jit/emitarm64sve.cpp | 153 ++++----- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 19 +- src/coreclr/jit/instr.h | 1 - 5 files changed, 185 insertions(+), 315 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 0450bb6f1a0d3..b8afcdef0a284 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5393,54 +5393,33 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D); // MSB ., /M, ., . // IF_SVE_AT_3A - theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V0, REG_V0, REG_V0, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); // ADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V3, REG_V31, REG_V12, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // SQADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V7, REG_V0, REG_V31, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // SQSUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V19, REG_V7, REG_V13, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // SUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V23, REG_V28, REG_V29, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); // UQADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V31, REG_V31, REG_V31, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // UQSUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_V0, REG_V31, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); // MUL ., ., . - theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V0, REG_V31, REG_V5, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // SMULH ., ., . - theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V31, REG_V5, REG_V0, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // UMULH ., ., . + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V0, REG_V0, REG_V0, INS_OPTS_SCALABLE_B); // ADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V3, REG_V31, REG_V12, INS_OPTS_SCALABLE_H); // SQADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V7, REG_V0, REG_V31, INS_OPTS_SCALABLE_S); // SQSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V19, REG_V7, REG_V13, INS_OPTS_SCALABLE_D); // SUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V23, REG_V28, REG_V29, INS_OPTS_SCALABLE_B); // UQADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V31, REG_V31, REG_V31, INS_OPTS_SCALABLE_H); // UQSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_V0, REG_V31, INS_OPTS_SCALABLE_B); // MUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V0, REG_V31, REG_V5, INS_OPTS_SCALABLE_H); // SMULH ., ., . + theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V31, REG_V5, REG_V0, INS_OPTS_SCALABLE_D); // UMULH ., ., . theEmitter->emitIns_R_R_R(INS_sve_sqdmulh, EA_SCALABLE, REG_V7, REG_V28, REG_V0, INS_OPTS_SCALABLE_B); // SQDMULH ., ., . theEmitter->emitIns_R_R_R(INS_sve_sqrdmulh, EA_SCALABLE, REG_V23, REG_V3, REG_V31, INS_OPTS_SCALABLE_H); // SQRDMULH ., ., . theEmitter->emitIns_R_R_R(INS_sve_ftssel, EA_SCALABLE, REG_V17, REG_V16, REG_V15, INS_OPTS_SCALABLE_D); // FTSSEL ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); // TRN1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // TRN1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // TRN2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // TRN2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); // UZP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // UZP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V18, REG_V19, REG_V20, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // UZP2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V21, REG_V22, REG_V23, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // UZP2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V24, REG_V25, REG_V26, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); // ZIP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V27, REG_V28, REG_V29, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // ZIP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V30, REG_V31, REG_V0, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V1, REG_V2, REG_V3, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TRN1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H); // TRN1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_S); // TRN2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // TRN2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_B); // UZP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_H); // UZP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V18, REG_V19, REG_V20, INS_OPTS_SCALABLE_S); // UZP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V21, REG_V22, REG_V23, INS_OPTS_SCALABLE_D); // UZP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V24, REG_V25, REG_V26, INS_OPTS_SCALABLE_B); // ZIP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V27, REG_V28, REG_V29, INS_OPTS_SCALABLE_H); // ZIP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V30, REG_V31, REG_V0, INS_OPTS_SCALABLE_S); // ZIP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V1, REG_V2, REG_V3, INS_OPTS_SCALABLE_D); // ZIP2 ., ., . theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TBXQ ., ., . theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V3, REG_V4, REG_V5, @@ -5479,18 +5458,12 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_S); // BGRP ., ., . theEmitter->emitIns_R_R_R(INS_sve_bgrp, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // BGRP ., ., . - theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // FADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // FMUL ., ., . - theEmitter->emitIns_R_R_R(INS_sve_frecps, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // FRECPS ., ., . - theEmitter->emitIns_R_R_R(INS_sve_frsqrts, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // FRSQRTS ., ., . - theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // FSUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_ftsmul, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // FTSMUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H); // FADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_S); // FMUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_frecps, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D); // FRECPS ., ., . + theEmitter->emitIns_R_R_R(INS_sve_frsqrts, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_H); // FRSQRTS ., ., . + theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_S); // FSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ftsmul, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_D); // FTSMUL ., ., . // IF_SVE_BA_3A theEmitter->emitIns_R_R_R(INS_sve_index, EA_4BYTE, REG_V24, REG_ZR, REG_R9, @@ -5529,18 +5502,12 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D_UXTW); // ADR .D, [.D, .D, UXTW{}] // IF_SVE_BR_3B - theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_UNPREDICATED); // TRN1 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_UNPREDICATED); // TRN2 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_UNPREDICATED); // UZP1 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_UNPREDICATED); // UZP2 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_UNPREDICATED); // ZIP1 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_UNPREDICATED); // ZIP2 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_Q); // TRN1 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_Q); // TRN2 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_Q); // UZP1 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_Q); // UZP2 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_Q); // ZIP1 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q); // ZIP2 .Q, .Q, .Q // IF_SVE_BS_1A theEmitter->emitIns_R_I(INS_sve_and, EA_SCALABLE, REG_V0, 0x00000000000000AA, @@ -5882,32 +5849,22 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_H); // BFCLAMP .H, .H, .H // IF_SVE_HK_3B - theEmitter->emitIns_R_R_R(INS_sve_bfadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // BFADD .H, .H, .H - theEmitter->emitIns_R_R_R(INS_sve_bfmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // BFMUL .H, .H, .H - theEmitter->emitIns_R_R_R(INS_sve_bfsub, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); // BFSUB .H, .H, .H + theEmitter->emitIns_R_R_R(INS_sve_bfadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H); // BFADD .H, .H, .H + theEmitter->emitIns_R_R_R(INS_sve_bfmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H); // BFMUL .H, .H, .H + theEmitter->emitIns_R_R_R(INS_sve_bfsub, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_H); // BFSUB .H, .H, .H #ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_AT_3B - theEmitter->emitIns_R_R_R(INS_sve_addpt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // ADDPT .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_subpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // SUBPT .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_addpt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D); // ADDPT .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_subpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D); // SUBPT .D, .D, .D #endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_AU_3A - theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // AND .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // BIC .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // EOR .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // MOV .D, .D - theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // ORR .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D); // AND .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D); // BIC .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D); // EOR .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // MOV .D, .D + theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_D); // ORR .D, .D, .D // IF_SVE_AV_3A theEmitter->emitIns_R_R_R(INS_sve_bcax, EA_SCALABLE, REG_V0, REG_V1, REG_V2, @@ -6255,10 +6212,8 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_CW_4A theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, . - theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); // SEL ., , ., . - theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); // SEL ., , ., . + theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D); // SEL ., , ., . + theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S); // SEL ., , ., . // IF_SVE_EQ_3A // Note: Scalable size is the size of the destination , not the source . @@ -8517,29 +8472,19 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_IE_2A // LDR , [{, #, MUL VL}] - theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 0, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 33, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, -173, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, -256, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 255, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 0, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 33, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, -173, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, -256, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_V3, REG_R4, 255, INS_OPTS_NONE); // IF_SVE_JH_2A // STR , [{, #, MUL VL}] - theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 0, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 71, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, -165, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, -256, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 255, INS_OPTS_NONE, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 0, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 71, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, -165, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, -256, INS_OPTS_NONE); + theEmitter->emitIns_R_R_I(INS_sve_str, EA_SCALABLE, REG_V2, REG_R3, 255, INS_OPTS_NONE); #ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_GG_3A @@ -8870,17 +8815,13 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_CG_2A // REV ., . - theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V3, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V3, INS_OPTS_SCALABLE_B); // REV ., . - theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V4, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V4, INS_OPTS_SCALABLE_H); // REV ., . - theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V7, REG_V1, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V7, REG_V1, INS_OPTS_SCALABLE_S); // REV ., . - theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V5, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_V2, REG_V5, INS_OPTS_SCALABLE_D); // IF_SVE_CB_2A // Note: EA_4BYTE used for B and H (source register is W) @@ -8937,106 +8878,57 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_BF_2A // ASR ., ., # - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_asr, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D); // LSL ., ., #emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 31, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 63, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 7, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 15, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 31, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V31, REG_V31, 63, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_lsl, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D); // LSR ., ., #emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); - theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_UNPREDICATED); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 8, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 5, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 4, INS_OPTS_SCALABLE_B); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 16, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 9, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 8, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 32, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 15, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 16, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V0, 1, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V31, REG_V31, 64, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 33, INS_OPTS_SCALABLE_D); + theEmitter->emitIns_R_R_I(INS_sve_lsr, EA_SCALABLE, REG_V0, REG_V31, 32, INS_OPTS_SCALABLE_D); // IF_SVE_FT_2A // SLI ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 720945451c139..73f4ff0592f71 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4252,9 +4252,8 @@ void emitter::emitIns_Mov( { if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) { - assert(insOptsNone(opt)); + assert(opt == INS_OPTS_SCALABLE_B); - opt = INS_OPTS_SCALABLE_B; attr = EA_SCALABLE; if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index e36e5cdb4d7d2..ba83b7ce91b9a 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2031,17 +2031,16 @@ void emitter::emitInsSve_R_R(instruction ins, break; case INS_sve_rev: - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg1)) { assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); assert(isVectorRegister(reg2)); assert(isScalableVectorSize(size)); fmt = IF_SVE_CG_2A; } else { - assert(insScalableOptsNone(sopt)); assert(insOptsScalableStandard(opt)); assert(isPredicateRegister(reg1)); // DDDD assert(isPredicateRegister(reg2)); // NNNN @@ -2366,17 +2365,15 @@ void emitter::emitInsSve_R_R_I(instruction ins, assert(isValidVectorShiftAmount(imm, optGetSveElemsize(opt), isRightShift)); assert(insOptsScalableStandard(opt)); assert(isScalableVectorSize(size)); - - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg2)) { assert((ins == INS_sve_asr) || (ins == INS_sve_lsl) || (ins == INS_sve_lsr)); assert(isVectorRegister(reg1)); - assert(isVectorRegister(reg2)); fmt = IF_SVE_BF_2A; } else { - assert(insScalableOptsNone(sopt)); assert(isVectorRegister(reg1)); // ddddd assert(isLowPredicateRegister(reg2)); // ggg fmt = IF_SVE_AM_2A; @@ -2674,14 +2671,13 @@ void emitter::emitInsSve_R_R_I(instruction ins, assert(isValidSimm<9>(imm)); // iii // iiiiii - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg1)) { - assert(isVectorRegister(reg1)); fmt = IF_SVE_IE_2A; } else { - assert(insScalableOptsNone(sopt)); assert(isPredicateRegister(reg1)); fmt = IF_SVE_ID_2A; } @@ -2694,14 +2690,13 @@ void emitter::emitInsSve_R_R_I(instruction ins, assert(isValidSimm<9>(imm)); // iii // iiiiii - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) - { - assert(isVectorRegister(reg1)); + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg1)) + { fmt = IF_SVE_JH_2A; } else { - assert(insScalableOptsNone(sopt)); assert(isPredicateRegister(reg1)); fmt = IF_SVE_JG_2A; } @@ -2917,19 +2912,18 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(insOptsScalableStandard(opt)); assert(isVectorRegister(reg1)); // mmmmm assert(isVectorRegister(reg3)); // ddddd + assert(insScalableOptsNone(sopt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + if (isVectorRegister(reg2)) { // The instruction only has a .D variant. However, this doesn't matter as // it operates on bits not lanes. Effectively this means all standard opt // sizes are supported. assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg2)); // nnnnn fmt = IF_SVE_AU_3A; } else { - assert(insScalableOptsNone(sopt)); assert(isLowPredicateRegister(reg2)); // ggg fmt = IF_SVE_AA_3A; } @@ -2941,16 +2935,15 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isVectorRegister(reg1)); assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg2)) { - assert(isVectorRegister(reg2)); assert(ins != INS_sve_subr); fmt = IF_SVE_AT_3A; } else { assert(isLowPredicateRegister(reg2)); - assert(insScalableOptsNone(sopt)); fmt = IF_SVE_AA_3A; } break; @@ -2961,15 +2954,14 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(opt == INS_OPTS_SCALABLE_D); assert(isVectorRegister(reg1)); // ddddd assert(isVectorRegister(reg3)); // mmmmm + assert(insScalableOptsNone(sopt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + if (isVectorRegister(reg2)) { - assert(isVectorRegister(reg2)); // nnnnn fmt = IF_SVE_AT_3B; } else { - assert(insScalableOptsNone(sopt)); assert(isLowPredicateRegister(reg2)); // ggg fmt = IF_SVE_AB_3B; } @@ -3007,14 +2999,13 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isVectorRegister(reg1)); assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg2)) { - assert(isVectorRegister(reg2)); fmt = IF_SVE_AT_3A; } else { - assert(insScalableOptsNone(sopt)); assert(isLowPredicateRegister(reg2)); fmt = IF_SVE_AA_3A; } @@ -3160,10 +3151,10 @@ void emitter::emitInsSve_R_R_R(instruction ins, case INS_sve_trn2: case INS_sve_zip2: assert(insOptsScalable(opt)); + assert(insScalableOptsNone(sopt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + if (isVectorRegister(reg1)) { - assert(isVectorRegister(reg1)); // ddddd assert(isVectorRegister(reg2)); // nnnnn assert(isVectorRegister(reg3)); // mmmmm @@ -3179,7 +3170,6 @@ void emitter::emitInsSve_R_R_R(instruction ins, } else { - assert(insScalableOptsNone(sopt)); assert(isPredicateRegister(reg1)); // DDDD assert(isPredicateRegister(reg2)); // NNNN assert(isPredicateRegister(reg3)); // MMMM @@ -3604,10 +3594,10 @@ void emitter::emitInsSve_R_R_R(instruction ins, break; case INS_sve_not: - if (isPredicateRegister(reg1) && sopt != INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isPredicateRegister(reg1)) { assert(opt == INS_OPTS_SCALABLE_B); - assert(isPredicateRegister(reg1)); // DDDD assert(isPredicateRegister(reg2)); // gggg assert(isPredicateRegister(reg3)); // NNNN fmt = IF_SVE_CZ_4A; @@ -3618,7 +3608,6 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); - assert(insScalableOptsNone(sopt)); fmt = IF_SVE_AP_3A; } break; @@ -3761,58 +3750,55 @@ void emitter::emitInsSve_R_R_R(instruction ins, case INS_sve_cpy: case INS_sve_mov: assert(insOptsScalableStandard(opt)); - // TODO-SVE: Following checks can be simplified to check reg1 as predicate register only after adding - // definitions for predicate registers. Currently, predicate registers P0 to P15 are aliased to simd - // registers V0 to V15. - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + if (isVectorRegister(reg1)) // ddddd { - assert(ins == INS_sve_mov); - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd - assert(isVectorRegister(reg2)); // nnnnn - assert(isVectorRegister(reg3)); // mmmmm - fmt = IF_SVE_AU_3A; - // ORR is an alias for MOV, and is always the preferred disassembly. - ins = INS_sve_orr; + if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE) + { + assert(isPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + fmt = IF_SVE_CW_4A; + } + else if (sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR) + { + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + fmt = IF_SVE_CP_3A; + // MOV is an alias for CPY, and is always the preferred disassembly. + ins = INS_sve_mov; + } + else if (isLowPredicateRegister(reg2)) + { + assert(isGeneralRegisterOrSP(reg3)); + assert(insScalableOptsNone(sopt)); + + fmt = IF_SVE_CQ_3A; + reg3 = encodingSPtoZR(reg3); + // MOV is an alias for CPY, and is always the preferred disassembly. + ins = INS_sve_mov; + } + else + { + assert(insScalableOptsNone(sopt)); + assert(ins == INS_sve_mov); + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + fmt = IF_SVE_AU_3A; + // ORR is an alias for MOV, and is always the preferred disassembly. + ins = INS_sve_orr; + } } - else if (isPredicateRegister(reg3) && - (sopt == INS_SCALABLE_OPTS_NONE || sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE)) + else if (isPredicateRegister(reg3)) // NNNN { assert(opt == INS_OPTS_SCALABLE_B); assert(isPredicateRegister(reg1)); // DDDD assert(isPredicateRegister(reg2)); // gggg - assert(isPredicateRegister(reg3)); // NNNN - fmt = sopt == INS_SCALABLE_OPTS_NONE ? IF_SVE_CZ_4A : IF_SVE_CZ_4A_K; + fmt = sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE ? IF_SVE_CZ_4A_K : IF_SVE_CZ_4A; // MOV is an alias for CPY, and is always the preferred disassembly. ins = INS_sve_mov; } - else if (sopt == INS_SCALABLE_OPTS_PREDICATE_MERGE) - { - assert(isVectorRegister(reg1)); - assert(isPredicateRegister(reg2)); - assert(isVectorRegister(reg3)); - assert(insOptsScalableStandard(opt)); - fmt = IF_SVE_CW_4A; - } else { - assert(isVectorRegister(reg1)); - assert(isLowPredicateRegister(reg2)); - if (isGeneralRegisterOrSP(reg3)) - { - assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_CQ_3A; - reg3 = encodingSPtoZR(reg3); - } - else - { - assert(sopt == INS_SCALABLE_OPTS_WITH_SIMD_SCALAR); - assert(isVectorRegister(reg3)); - fmt = IF_SVE_CP_3A; - } - - // MOV is an alias for CPY, and is always the preferred disassembly. - ins = INS_sve_mov; + unreached(); } break; @@ -3992,14 +3978,14 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isVectorRegister(reg3)); assert(insOptsScalableStandard(opt)); assert(isScalableVectorSize(size)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + + if (isVectorRegister(reg2)) { - assert(isVectorRegister(reg2)); fmt = IF_SVE_AT_3A; } else { - assert(insScalableOptsNone(sopt)); assert(isLowPredicateRegister(reg2)); fmt = IF_SVE_AA_3A; } @@ -4119,15 +4105,14 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isVectorRegister(reg1)); // ddddd assert(isVectorRegister(reg3)); // mmmmm assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + assert(insScalableOptsNone(sopt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + if (isVectorRegister(reg2)) // nnnnn { - assert(isVectorRegister(reg2)); // nnnnn fmt = IF_SVE_AT_3A; } else { - assert(insScalableOptsNone(sopt)); assert(isLowPredicateRegister(reg2)); // ggg fmt = IF_SVE_HL_3A; } @@ -4172,15 +4157,14 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(opt == INS_OPTS_SCALABLE_H); assert(isVectorRegister(reg1)); // ddddd assert(isVectorRegister(reg3)); // mmmmm + assert(insScalableOptsNone(sopt)); - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + if (isVectorRegister(reg2)) // nnnnn { - assert(isVectorRegister(reg2)); // nnnnn fmt = IF_SVE_HK_3B; } else { - assert(insScalableOptsNone(sopt)); assert(isLowPredicateRegister(reg2)); // ggg fmt = IF_SVE_HL_3B; } @@ -5901,7 +5885,8 @@ void emitter::emitInsSve_R_R_R_R(instruction ins, switch (ins) { case INS_sve_sel: - if (sopt == INS_SCALABLE_OPTS_UNPREDICATED) + assert(insScalableOptsNone(sopt)); + if (isVectorRegister(reg1)) { if (reg1 == reg4) { @@ -5909,9 +5894,7 @@ void emitter::emitInsSve_R_R_R_R(instruction ins, return emitInsSve_R_R_R(INS_sve_mov, attr, reg1, reg2, reg3, opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); } - assert(insOptsScalableStandard(opt)); - assert(isVectorRegister(reg1)); // ddddd assert(isPredicateRegister(reg2)); // VVVV assert(isVectorRegister(reg3)); // nnnnn assert(isVectorRegister(reg4)); // mmmmm diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 38a480ae77ef5..0d0bbc6cfc963 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -525,7 +525,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, - falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED); + falseReg, opt); break; } else @@ -569,7 +569,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // If the embedded instruction supports optional mask operation, use the "unpredicated" // version of the instruction, followed by "sel" to select the active lanes. GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg, - embMaskOp2Reg, opt, INS_SCALABLE_OPTS_UNPREDICATED); + embMaskOp2Reg, opt); } else { @@ -588,7 +588,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg, - falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED); + falseReg, opt); break; } else if (targetReg != embMaskOp1Reg) @@ -761,7 +761,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // have the value from embMaskOp1Reg GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, embMaskOp1Reg, - falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED); + falseReg, opt); } } else if (targetReg != embMaskOp1Reg) @@ -816,8 +816,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { // This generates an unpredicated version // Implicitly predicated should be taken care above `intrin.op2->IsEmbMaskOp()` - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt, - INS_SCALABLE_OPTS_UNPREDICATED); + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); } } else if (isRMW) @@ -852,8 +851,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt, - INS_SCALABLE_OPTS_UNPREDICATED); + GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, op3Reg, opt); } break; @@ -1924,7 +1922,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_ReverseElement: // Use non-predicated version explicitly - GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt, INS_SCALABLE_OPTS_UNPREDICATED); + GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt); break; case NI_Sve_StoreNarrowing: @@ -1939,8 +1937,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_ZipHigh: case NI_Sve_ZipLow: // Use non-predicated version explicitly - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt, - INS_SCALABLE_OPTS_UNPREDICATED); + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; case NI_Sve_SaturatingDecrementBy16BitElementCountScalar: diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 421dee30ac754..9a43389eba03c 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -381,7 +381,6 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_IMM_FIRST, // Variants with an immediate and a register, where the immediate comes first // Removable once REG_V0 and REG_P0 are distinct - INS_SCALABLE_OPTS_UNPREDICATED, // Variants without a predicate (eg add) INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr) INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov) INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov) From 04797f104ab3017a52dd6fdc672a266da8afa6e3 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:33:22 -0700 Subject: [PATCH 03/10] Remove INS_SCALABLE_OPTS_UNPREDICATED_WIDE --- src/coreclr/jit/codegenarm64test.cpp | 9 +++------ src/coreclr/jit/emitarm64sve.cpp | 4 ++-- src/coreclr/jit/instr.h | 1 - 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index b8afcdef0a284..33a87400ab9ce 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5476,12 +5476,9 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_B); // PMUL .B, .B, .B // IF_SVE_BG_3A - theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V9, REG_V31, REG_V2, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // ASR ., ., .D - theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_V0, REG_V12, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // LSL ., ., .D - theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V29, REG_V10, REG_V22, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_UNPREDICATED_WIDE); // LSR ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V9, REG_V31, REG_V2, INS_OPTS_SCALABLE_B); // ASR ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_V0, REG_V12, INS_OPTS_SCALABLE_H); // LSL ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V29, REG_V10, REG_V22, INS_OPTS_SCALABLE_S); // LSR ., ., .D // IF_SVE_BH_3A theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V4, REG_V2, REG_V0, 0, INS_OPTS_SCALABLE_D, diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index ba83b7ce91b9a..7a7a07c2f0066 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -3129,9 +3129,9 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(insOptsScalableWide(opt)); fmt = IF_SVE_AO_3A; } - else if (sopt == INS_SCALABLE_OPTS_UNPREDICATED_WIDE) + else if (isVectorRegister(reg2)) { - assert(isVectorRegister(reg2)); + assert(insScalableOptsNone(sopt)); assert(insOptsScalableWide(opt)); fmt = IF_SVE_BG_3A; } diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 9a43389eba03c..76653ad28f857 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -381,7 +381,6 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_IMM_FIRST, // Variants with an immediate and a register, where the immediate comes first // Removable once REG_V0 and REG_P0 are distinct - INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr) INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov) INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov) INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both From 4b949be20443c09fadf0ff10f7f65d118ffaa638 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:37:23 -0700 Subject: [PATCH 04/10] TO_PREDICATE AND TO_VECTOR --- src/coreclr/jit/codegenarm64test.cpp | 48 ++++++++++------------------ src/coreclr/jit/emitarm64sve.cpp | 18 +++-------- src/coreclr/jit/instr.h | 2 -- 3 files changed, 20 insertions(+), 48 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 33a87400ab9ce..8a4b26ec64003 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4792,52 +4792,36 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_WIDE); // LSR ., /M, ., .D // IF_SVE_CE_2A - theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P2, REG_V12, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .B, - theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V2, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .H, [0] + theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P2, REG_V12, INS_OPTS_SCALABLE_B); // PMOV .B, + theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V2, INS_OPTS_SCALABLE_H); // PMOV .H, [0] // IF_SVE_CE_2B - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P15, REG_V7, 7, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .D, [] - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V16, 0, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .D, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P15, REG_V7, 7, INS_OPTS_SCALABLE_D); // PMOV .D, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V16, 0, INS_OPTS_SCALABLE_D); // PMOV .D, [] // IF_SVE_CE_2C - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P0, REG_V31, 1, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .H, [] - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P1, REG_V1, 0, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .H, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P0, REG_V31, 1, INS_OPTS_SCALABLE_H); // PMOV .H, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P1, REG_V1, 0, INS_OPTS_SCALABLE_H); // PMOV .H, [] // IF_SVE_CE_2D - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P3, REG_V9, 3, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .S, [] - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P10, REG_V4, 0, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_TO_PREDICATE); // PMOV .S, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P3, REG_V9, 3, INS_OPTS_SCALABLE_S); // PMOV .S, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P10, REG_V4, 0, INS_OPTS_SCALABLE_S); // PMOV .S, [] // IF_SVE_CF_2A - theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V11, REG_P12, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV , .B - theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V2, REG_P7, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [0], .S + theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V11, REG_P12, INS_OPTS_SCALABLE_B); // PMOV , .B + theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V2, REG_P7, INS_OPTS_SCALABLE_S); // PMOV [0], .S // IF_SVE_CF_2B - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V6, REG_P8, 7, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [], .D - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V9, REG_P7, 0, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [], .D + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V6, REG_P8, 7, INS_OPTS_SCALABLE_D); // PMOV [], .D + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V9, REG_P7, 0, INS_OPTS_SCALABLE_D); // PMOV [], .D // IF_SVE_CF_2C - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V8, REG_P4, 1, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [], .H - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V5, REG_P9, 0, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [], .H + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V8, REG_P4, 1, INS_OPTS_SCALABLE_H); // PMOV [], .H + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V5, REG_P9, 0, INS_OPTS_SCALABLE_H); // PMOV [], .H // IF_SVE_CF_2D - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V14, REG_P2, 3, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [], .S - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V3, REG_P15, 0, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_TO_VECTOR); // PMOV [], .S + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V14, REG_P2, 3, INS_OPTS_SCALABLE_S); // PMOV [], .S + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V3, REG_P15, 0, INS_OPTS_SCALABLE_S); // PMOV [], .S // IF_SVE_CJ_2A theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_P1, REG_P2, diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 7a7a07c2f0066..901d9ff2ce7ec 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -1926,22 +1926,17 @@ void emitter::emitInsSve_R_R(instruction ins, assert(insOptsScalableStandard(opt)); return emitInsSve_R_R_I(INS_sve_pmov, attr, reg1, reg2, 0, opt, sopt); } - if (sopt == INS_SCALABLE_OPTS_TO_PREDICATE) + if (isPredicateRegister(reg1)) { - assert(isPredicateRegister(reg1)); assert(isVectorRegister(reg2)); fmt = IF_SVE_CE_2A; } - else if (sopt == INS_SCALABLE_OPTS_TO_VECTOR) + else { assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); fmt = IF_SVE_CF_2A; } - else - { - assert(!"invalid instruction"); - } break; case INS_sve_movs: @@ -2486,9 +2481,8 @@ void emitter::emitInsSve_R_R_I(instruction ins, break; case INS_sve_pmov: - if (sopt == INS_SCALABLE_OPTS_TO_PREDICATE) + if (isPredicateRegister(reg1)) { - assert(isPredicateRegister(reg1)); assert(isVectorRegister(reg2)); switch (opt) { @@ -2508,7 +2502,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, unreached(); } } - else if (sopt == INS_SCALABLE_OPTS_TO_VECTOR) + else { assert(isVectorRegister(reg1)); assert(isPredicateRegister(reg2)); @@ -2530,10 +2524,6 @@ void emitter::emitInsSve_R_R_I(instruction ins, unreached(); } } - else - { - unreached(); - } break; case INS_sve_sqrshrn: diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 76653ad28f857..125d39a9c15e0 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -381,8 +381,6 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_IMM_FIRST, // Variants with an immediate and a register, where the immediate comes first // Removable once REG_V0 and REG_P0 are distinct - INS_SCALABLE_OPTS_TO_PREDICATE, // Variants moving to a predicate from a vector (e.g. pmov) - INS_SCALABLE_OPTS_TO_VECTOR, // Variants moving to a vector from a predicate (e.g. pmov) INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both }; From f3ebe4d07921ed2a91f5ed9afe1d60d9abba1035 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:39:59 -0700 Subject: [PATCH 05/10] Remove INS_SCALABLE_OPTS_BROADCAST --- src/coreclr/jit/codegenarm64test.cpp | 30 ++++++++++------------------ src/coreclr/jit/emitarm64sve.cpp | 2 +- src/coreclr/jit/instr.h | 3 --- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 8a4b26ec64003..43e6c338d2c79 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5598,26 +5598,16 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_Q); // MOV ., // MOV implementation should produce same output as DUP implementation with same parameters - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V1, REG_V16, 63, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V17, REG_V18, 31, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V11, 15, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V3, 7, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V3, REG_V8, 3, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V13, REG_V9, 0, INS_OPTS_SCALABLE_B, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V12, REG_V6, 0, INS_OPTS_SCALABLE_H, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V7, 0, INS_OPTS_SCALABLE_S, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V10, REG_V20, 0, INS_OPTS_SCALABLE_Q, - INS_SCALABLE_OPTS_BROADCAST); // MOV ., + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V1, REG_V16, 63, INS_OPTS_SCALABLE_B); // MOV ., .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V17, REG_V18, 31, INS_OPTS_SCALABLE_H); // MOV ., .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V11, 15, INS_OPTS_SCALABLE_S); // MOV ., .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V3, 7, INS_OPTS_SCALABLE_D); // MOV ., .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V3, REG_V8, 3, INS_OPTS_SCALABLE_Q); // MOV ., .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V13, REG_V9, 0, INS_OPTS_SCALABLE_B); // MOV ., + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V12, REG_V6, 0, INS_OPTS_SCALABLE_H); // MOV ., + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V7, 0, INS_OPTS_SCALABLE_S); // MOV ., + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D); // MOV ., + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V10, REG_V20, 0, INS_OPTS_SCALABLE_Q); // MOV ., // IF_SVE_BZ_3A theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 901d9ff2ce7ec..1c06de940a415 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2438,7 +2438,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, break; case INS_sve_mov: - if (sopt == INS_SCALABLE_OPTS_BROADCAST) + if (isVectorRegister(reg2)) { return emitInsSve_R_R_I(INS_sve_dup, attr, reg1, reg2, imm, opt, sopt); } diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 125d39a9c15e0..47db1c8304a8a 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -379,9 +379,6 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_IMM_BITMASK, // Variants with an immediate that is a bitmask INS_SCALABLE_OPTS_IMM_FIRST, // Variants with an immediate and a register, where the immediate comes first - - // Removable once REG_V0 and REG_P0 are distinct - INS_SCALABLE_OPTS_BROADCAST, // Used to distinguish mov from cpy, where mov is an alias for both }; // Maps directly to the pattern used in SVE instructions such as cntb. From 6c0cae066ef39c5507f3041bcdfce7a894a37117 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:46:23 -0700 Subject: [PATCH 06/10] Remove ins_sve_ldr_mask and ins_sve_str_mask --- src/coreclr/jit/emitarm64.cpp | 67 +++----------------------------- src/coreclr/jit/instr.cpp | 4 +- src/coreclr/jit/instrsarm64sve.h | 4 -- 3 files changed, 8 insertions(+), 67 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 73f4ff0592f71..f5df82bc48b7b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -7881,42 +7881,14 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va case INS_sve_ldr: { - assert(isVectorRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_IE_2A; - - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - } - } - break; - - // TODO-SVE: Fold into INS_sve_ldr once REG_V0 and REG_P0 are distinct - case INS_sve_ldr_mask: - { - assert(isPredicateRegister(reg1)); isSimple = false; size = EA_SCALABLE; attr = size; - fmt = IF_SVE_ID_2A; - ins = INS_sve_ldr; + fmt = isVectorRegister(reg1) ? IF_SVE_IE_2A : IF_SVE_ID_2A; // TODO-SVE: Don't assume 128bit vectors // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); + scale = NaturalScale_helper(isVectorRegister(reg1) ? EA_16BYTE : EA_2BYTE); ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) @@ -7929,8 +7901,8 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va regNumber rsvdReg = codeGen->rsGetRsvdReg(); codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); } + break; } - break; default: NYI("emitIns_R_S"); // FP locals? @@ -8160,42 +8132,14 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va case INS_sve_str: { - assert(isVectorRegister(reg1)); isSimple = false; size = EA_SCALABLE; attr = size; - fmt = IF_SVE_JH_2A; - - // TODO-SVE: Don't assume 128bit vectors - scale = NaturalScale_helper(EA_16BYTE); - ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate - - if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) - { - imm >>= scale; // The immediate is scaled by the size of the ld/st - } - else - { - useRegForImm = true; - regNumber rsvdReg = codeGen->rsGetRsvdReg(); - codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); - } - } - break; - - // TODO-SVE: Fold into INS_sve_str once REG_V0 and REG_P0 are distinct - case INS_sve_str_mask: - { - assert(isPredicateRegister(reg1)); - isSimple = false; - size = EA_SCALABLE; - attr = size; - fmt = IF_SVE_JG_2A; - ins = INS_sve_str; + fmt = isVectorRegister(reg1) ? IF_SVE_JH_2A : IF_SVE_JG_2A; // TODO-SVE: Don't assume 128bit vectors // Predicate size is vector length / 8 - scale = NaturalScale_helper(EA_2BYTE); + scale = NaturalScale_helper(isVectorRegister(reg1) ? EA_16BYTE : EA_2BYTE); ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate if (((imm & mask) == 0) && (isValidSimm<9>(imm >> scale))) @@ -8208,6 +8152,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va regNumber rsvdReg = codeGen->rsGetRsvdReg(); codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm); } + break; } break; diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 4799b8b333491..1ea14aaf6a38a 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1873,7 +1873,7 @@ instruction CodeGenInterface::ins_Load(var_types srcType, bool aligned /*=false* #if defined(TARGET_XARCH) return INS_kmovq_msk; #elif defined(TARGET_ARM64) - return INS_sve_ldr_mask; + return INS_sve_ldr; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS @@ -2194,7 +2194,7 @@ instruction CodeGenInterface::ins_Store(var_types dstType, bool aligned /*=false #if defined(TARGET_XARCH) return INS_kmovq_msk; #elif defined(TARGET_ARM64) - return INS_sve_str_mask; + return INS_sve_str; #endif } #endif // FEATURE_MASKED_HW_INTRINSICS diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index e29899d981b6d..345831eed6918 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -2841,10 +2841,6 @@ INST1(st1q, "st1q", 0, IF_SV // ST1Q {.Q }, , [.D{, }] SVE_IY_4A 11100100001mmmmm 001gggnnnnnttttt E420 2000 -// TODO-SVE: Removable once REG_V0 and REG_P0 are distinct -INST1(str_mask, "str_mask", 0, IF_SN_0A, BAD_CODE) -INST1(ldr_mask, "ldr_mask", 0, IF_SN_0A, BAD_CODE) - // clang-format on /*****************************************************************************/ From a395a9d06122b07428af50c00bd5179767172b57 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:51:51 -0700 Subject: [PATCH 07/10] bug fix --- src/coreclr/jit/emitarm64.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f5df82bc48b7b..3117bdbcb89d6 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4252,7 +4252,14 @@ void emitter::emitIns_Mov( { if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) { - assert(opt == INS_OPTS_SCALABLE_B); + if (insOptsNone(opt)) + { + opt = INS_OPTS_SCALABLE_B; + } + else + { + assert(opt == INS_OPTS_SCALABLE_B); + } attr = EA_SCALABLE; From 08efeae2d04421a45f2e5fa145c6a6b18ef12484 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 17 Jun 2024 23:56:19 -0700 Subject: [PATCH 08/10] jit format --- src/coreclr/jit/codegenarm64test.cpp | 214 ++++++++++++++++++--------- src/coreclr/jit/emitarm64sve.cpp | 6 +- src/coreclr/jit/instrsarm64sve.h | 1 - 3 files changed, 145 insertions(+), 76 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 43e6c338d2c79..90105004bb4be 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4793,35 +4793,47 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_CE_2A theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P2, REG_V12, INS_OPTS_SCALABLE_B); // PMOV .B, - theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V2, INS_OPTS_SCALABLE_H); // PMOV .H, [0] + theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V2, INS_OPTS_SCALABLE_H); // PMOV .H, [0] // IF_SVE_CE_2B - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P15, REG_V7, 7, INS_OPTS_SCALABLE_D); // PMOV .D, [] - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V16, 0, INS_OPTS_SCALABLE_D); // PMOV .D, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P15, REG_V7, 7, INS_OPTS_SCALABLE_D); // PMOV .D, + // [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P7, REG_V16, 0, INS_OPTS_SCALABLE_D); // PMOV .D, + // [] // IF_SVE_CE_2C - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P0, REG_V31, 1, INS_OPTS_SCALABLE_H); // PMOV .H, [] - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P1, REG_V1, 0, INS_OPTS_SCALABLE_H); // PMOV .H, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P0, REG_V31, 1, INS_OPTS_SCALABLE_H); // PMOV .H, + // [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P1, REG_V1, 0, INS_OPTS_SCALABLE_H); // PMOV .H, + // [] // IF_SVE_CE_2D - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P3, REG_V9, 3, INS_OPTS_SCALABLE_S); // PMOV .S, [] - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P10, REG_V4, 0, INS_OPTS_SCALABLE_S); // PMOV .S, [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P3, REG_V9, 3, INS_OPTS_SCALABLE_S); // PMOV .S, + // [] + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_P10, REG_V4, 0, INS_OPTS_SCALABLE_S); // PMOV .S, + // [] // IF_SVE_CF_2A theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V11, REG_P12, INS_OPTS_SCALABLE_B); // PMOV , .B theEmitter->emitIns_R_R(INS_sve_pmov, EA_SCALABLE, REG_V2, REG_P7, INS_OPTS_SCALABLE_S); // PMOV [0], .S // IF_SVE_CF_2B - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V6, REG_P8, 7, INS_OPTS_SCALABLE_D); // PMOV [], .D - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V9, REG_P7, 0, INS_OPTS_SCALABLE_D); // PMOV [], .D + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V6, REG_P8, 7, INS_OPTS_SCALABLE_D); // PMOV [], + // .D + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V9, REG_P7, 0, INS_OPTS_SCALABLE_D); // PMOV [], + // .D // IF_SVE_CF_2C - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V8, REG_P4, 1, INS_OPTS_SCALABLE_H); // PMOV [], .H - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V5, REG_P9, 0, INS_OPTS_SCALABLE_H); // PMOV [], .H + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V8, REG_P4, 1, INS_OPTS_SCALABLE_H); // PMOV [], + // .H + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V5, REG_P9, 0, INS_OPTS_SCALABLE_H); // PMOV [], + // .H // IF_SVE_CF_2D - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V14, REG_P2, 3, INS_OPTS_SCALABLE_S); // PMOV [], .S - theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V3, REG_P15, 0, INS_OPTS_SCALABLE_S); // PMOV [], .S + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V14, REG_P2, 3, INS_OPTS_SCALABLE_S); // PMOV [], + // .S + theEmitter->emitIns_R_R_I(INS_sve_pmov, EA_SCALABLE, REG_V3, REG_P15, 0, INS_OPTS_SCALABLE_S); // PMOV [], + // .S // IF_SVE_CJ_2A theEmitter->emitIns_R_R(INS_sve_rev, EA_SCALABLE, REG_P1, REG_P2, @@ -5377,33 +5389,54 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D); // MSB ., /M, ., . // IF_SVE_AT_3A - theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V0, REG_V0, REG_V0, INS_OPTS_SCALABLE_B); // ADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V3, REG_V31, REG_V12, INS_OPTS_SCALABLE_H); // SQADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V7, REG_V0, REG_V31, INS_OPTS_SCALABLE_S); // SQSUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V19, REG_V7, REG_V13, INS_OPTS_SCALABLE_D); // SUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V23, REG_V28, REG_V29, INS_OPTS_SCALABLE_B); // UQADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V31, REG_V31, REG_V31, INS_OPTS_SCALABLE_H); // UQSUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_V0, REG_V31, INS_OPTS_SCALABLE_B); // MUL ., ., . - theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V0, REG_V31, REG_V5, INS_OPTS_SCALABLE_H); // SMULH ., ., . - theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V31, REG_V5, REG_V0, INS_OPTS_SCALABLE_D); // UMULH ., ., . + theEmitter->emitIns_R_R_R(INS_sve_add, EA_SCALABLE, REG_V0, REG_V0, REG_V0, + INS_OPTS_SCALABLE_B); // ADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqadd, EA_SCALABLE, REG_V3, REG_V31, REG_V12, + INS_OPTS_SCALABLE_H); // SQADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqsub, EA_SCALABLE, REG_V7, REG_V0, REG_V31, + INS_OPTS_SCALABLE_S); // SQSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sub, EA_SCALABLE, REG_V19, REG_V7, REG_V13, + INS_OPTS_SCALABLE_D); // SUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uqadd, EA_SCALABLE, REG_V23, REG_V28, REG_V29, + INS_OPTS_SCALABLE_B); // UQADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uqsub, EA_SCALABLE, REG_V31, REG_V31, REG_V31, + INS_OPTS_SCALABLE_H); // UQSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_mul, EA_SCALABLE, REG_V5, REG_V0, REG_V31, + INS_OPTS_SCALABLE_B); // MUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_smulh, EA_SCALABLE, REG_V0, REG_V31, REG_V5, + INS_OPTS_SCALABLE_H); // SMULH ., ., . + theEmitter->emitIns_R_R_R(INS_sve_umulh, EA_SCALABLE, REG_V31, REG_V5, REG_V0, + INS_OPTS_SCALABLE_D); // UMULH ., ., . theEmitter->emitIns_R_R_R(INS_sve_sqdmulh, EA_SCALABLE, REG_V7, REG_V28, REG_V0, INS_OPTS_SCALABLE_B); // SQDMULH ., ., . theEmitter->emitIns_R_R_R(INS_sve_sqrdmulh, EA_SCALABLE, REG_V23, REG_V3, REG_V31, INS_OPTS_SCALABLE_H); // SQRDMULH ., ., . theEmitter->emitIns_R_R_R(INS_sve_ftssel, EA_SCALABLE, REG_V17, REG_V16, REG_V15, INS_OPTS_SCALABLE_D); // FTSSEL ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TRN1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H); // TRN1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_S); // TRN2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // TRN2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_B); // UZP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_H); // UZP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V18, REG_V19, REG_V20, INS_OPTS_SCALABLE_S); // UZP2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V21, REG_V22, REG_V23, INS_OPTS_SCALABLE_D); // UZP2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V24, REG_V25, REG_V26, INS_OPTS_SCALABLE_B); // ZIP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V27, REG_V28, REG_V29, INS_OPTS_SCALABLE_H); // ZIP1 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V30, REG_V31, REG_V0, INS_OPTS_SCALABLE_S); // ZIP2 ., ., . - theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V1, REG_V2, REG_V3, INS_OPTS_SCALABLE_D); // ZIP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_B); // TRN1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_H); // TRN1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_S); // TRN2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_D); // TRN2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_B); // UZP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V15, REG_V16, REG_V17, + INS_OPTS_SCALABLE_H); // UZP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V18, REG_V19, REG_V20, + INS_OPTS_SCALABLE_S); // UZP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V21, REG_V22, REG_V23, + INS_OPTS_SCALABLE_D); // UZP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V24, REG_V25, REG_V26, + INS_OPTS_SCALABLE_B); // ZIP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V27, REG_V28, REG_V29, + INS_OPTS_SCALABLE_H); // ZIP1 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V30, REG_V31, REG_V0, + INS_OPTS_SCALABLE_S); // ZIP2 ., ., . + theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V1, REG_V2, REG_V3, + INS_OPTS_SCALABLE_D); // ZIP2 ., ., . theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_B); // TBXQ ., ., . theEmitter->emitIns_R_R_R(INS_sve_tbxq, EA_SCALABLE, REG_V3, REG_V4, REG_V5, @@ -5442,12 +5475,18 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_S); // BGRP ., ., . theEmitter->emitIns_R_R_R(INS_sve_bgrp, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // BGRP ., ., . - theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H); // FADD ., ., . - theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_S); // FMUL ., ., . - theEmitter->emitIns_R_R_R(INS_sve_frecps, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D); // FRECPS ., ., . - theEmitter->emitIns_R_R_R(INS_sve_frsqrts, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_H); // FRSQRTS ., ., . - theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_S); // FSUB ., ., . - theEmitter->emitIns_R_R_R(INS_sve_ftsmul, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_D); // FTSMUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // FADD ., ., . + theEmitter->emitIns_R_R_R(INS_sve_fmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_S); // FMUL ., ., . + theEmitter->emitIns_R_R_R(INS_sve_frecps, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_D); // FRECPS ., ., . + theEmitter->emitIns_R_R_R(INS_sve_frsqrts, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_H); // FRSQRTS ., ., . + theEmitter->emitIns_R_R_R(INS_sve_fsub, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_S); // FSUB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ftsmul, EA_SCALABLE, REG_V15, REG_V16, REG_V17, + INS_OPTS_SCALABLE_D); // FTSMUL ., ., . // IF_SVE_BA_3A theEmitter->emitIns_R_R_R(INS_sve_index, EA_4BYTE, REG_V24, REG_ZR, REG_R9, @@ -5460,9 +5499,12 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_B); // PMUL .B, .B, .B // IF_SVE_BG_3A - theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V9, REG_V31, REG_V2, INS_OPTS_SCALABLE_B); // ASR ., ., .D - theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_V0, REG_V12, INS_OPTS_SCALABLE_H); // LSL ., ., .D - theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V29, REG_V10, REG_V22, INS_OPTS_SCALABLE_S); // LSR ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_asr, EA_SCALABLE, REG_V9, REG_V31, REG_V2, + INS_OPTS_SCALABLE_B); // ASR ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_lsl, EA_SCALABLE, REG_V19, REG_V0, REG_V12, + INS_OPTS_SCALABLE_H); // LSL ., ., .D + theEmitter->emitIns_R_R_R(INS_sve_lsr, EA_SCALABLE, REG_V29, REG_V10, REG_V22, + INS_OPTS_SCALABLE_S); // LSR ., ., .D // IF_SVE_BH_3A theEmitter->emitInsSve_R_R_R_I(INS_sve_adr, EA_SCALABLE, REG_V4, REG_V2, REG_V0, 0, INS_OPTS_SCALABLE_D, @@ -5483,12 +5525,18 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D_UXTW); // ADR .D, [.D, .D, UXTW{}] // IF_SVE_BR_3B - theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_Q); // TRN1 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_Q); // TRN2 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_Q); // UZP1 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_Q); // UZP2 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_Q); // ZIP1 .Q, .Q, .Q - theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_Q); // ZIP2 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_Q); // TRN1 .Q, + // .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_trn2, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_Q); // TRN2 .Q, + // .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_uzp1, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_Q); // UZP1 .Q, + // .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_uzp2, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_Q); // UZP2 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_zip1, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_Q); // ZIP1 .Q, .Q, .Q + theEmitter->emitIns_R_R_R(INS_sve_zip2, EA_SCALABLE, REG_V15, REG_V16, REG_V17, + INS_OPTS_SCALABLE_Q); // ZIP2 .Q, .Q, .Q // IF_SVE_BS_1A theEmitter->emitIns_R_I(INS_sve_and, EA_SCALABLE, REG_V0, 0x00000000000000AA, @@ -5598,16 +5646,26 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_Q); // MOV ., // MOV implementation should produce same output as DUP implementation with same parameters - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V1, REG_V16, 63, INS_OPTS_SCALABLE_B); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V17, REG_V18, 31, INS_OPTS_SCALABLE_H); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V11, 15, INS_OPTS_SCALABLE_S); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V3, 7, INS_OPTS_SCALABLE_D); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V3, REG_V8, 3, INS_OPTS_SCALABLE_Q); // MOV ., .[] - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V13, REG_V9, 0, INS_OPTS_SCALABLE_B); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V12, REG_V6, 0, INS_OPTS_SCALABLE_H); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V7, 0, INS_OPTS_SCALABLE_S); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D); // MOV ., - theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V10, REG_V20, 0, INS_OPTS_SCALABLE_Q); // MOV ., + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V1, REG_V16, 63, INS_OPTS_SCALABLE_B); // MOV ., + // .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V17, REG_V18, 31, INS_OPTS_SCALABLE_H); // MOV ., + // .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V11, 15, INS_OPTS_SCALABLE_S); // MOV ., + // .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V3, 7, INS_OPTS_SCALABLE_D); // MOV ., + // .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V3, REG_V8, 3, INS_OPTS_SCALABLE_Q); // MOV ., + // .[] + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V13, REG_V9, 0, INS_OPTS_SCALABLE_B); // MOV ., + // + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V12, REG_V6, 0, INS_OPTS_SCALABLE_H); // MOV ., + // + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V2, REG_V7, 0, INS_OPTS_SCALABLE_S); // MOV ., + // + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V0, REG_V0, 0, INS_OPTS_SCALABLE_D); // MOV ., + // + theEmitter->emitIns_R_R_I(INS_sve_mov, EA_SCALABLE, REG_V10, REG_V20, 0, INS_OPTS_SCALABLE_Q); // MOV ., + // // IF_SVE_BZ_3A theEmitter->emitIns_R_R_R(INS_sve_tbl, EA_SCALABLE, REG_V0, REG_V1, REG_V2, @@ -5820,22 +5878,32 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_H); // BFCLAMP .H, .H, .H // IF_SVE_HK_3B - theEmitter->emitIns_R_R_R(INS_sve_bfadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_H); // BFADD .H, .H, .H - theEmitter->emitIns_R_R_R(INS_sve_bfmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_H); // BFMUL .H, .H, .H - theEmitter->emitIns_R_R_R(INS_sve_bfsub, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_H); // BFSUB .H, .H, .H + theEmitter->emitIns_R_R_R(INS_sve_bfadd, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // BFADD .H, .H, .H + theEmitter->emitIns_R_R_R(INS_sve_bfmul, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_H); // BFMUL .H, .H, .H + theEmitter->emitIns_R_R_R(INS_sve_bfsub, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_H); // BFSUB .H, .H, .H #ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_AT_3B - theEmitter->emitIns_R_R_R(INS_sve_addpt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D); // ADDPT .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_subpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D); // SUBPT .D, .D, .D -#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED + theEmitter->emitIns_R_R_R(INS_sve_addpt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_D); // ADDPT .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_subpt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_D); // SUBPT .D, .D, .D +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_AU_3A - theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D); // AND .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D); // BIC .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D); // EOR .D, .D, .D - theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // MOV .D, .D - theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_D); // ORR .D, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_and, EA_SCALABLE, REG_V0, REG_V1, REG_V2, INS_OPTS_SCALABLE_D); // AND .D, + // .D, .D + theEmitter->emitIns_R_R_R(INS_sve_bic, EA_SCALABLE, REG_V3, REG_V4, REG_V5, INS_OPTS_SCALABLE_D); // BIC .D, + // .D, .D + theEmitter->emitIns_R_R_R(INS_sve_eor, EA_SCALABLE, REG_V6, REG_V7, REG_V8, INS_OPTS_SCALABLE_D); // EOR .D, + // .D, .D + theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V9, REG_V10, REG_V11, INS_OPTS_SCALABLE_D); // MOV .D, + // .D + theEmitter->emitIns_R_R_R(INS_sve_orr, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_D); // ORR .D, .D, .D // IF_SVE_AV_3A theEmitter->emitIns_R_R_R(INS_sve_bcax, EA_SCALABLE, REG_V0, REG_V1, REG_V2, @@ -6183,8 +6251,10 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_CW_4A theEmitter->emitIns_R_R_R(INS_sve_mov, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_PREDICATE_MERGE); // MOV ., /M, . - theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, INS_OPTS_SCALABLE_D); // SEL ., , ., . - theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, INS_OPTS_SCALABLE_S); // SEL ., , ., . + theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V29, REG_P15, REG_V28, REG_V4, + INS_OPTS_SCALABLE_D); // SEL ., , ., . + theEmitter->emitIns_R_R_R_R(INS_sve_sel, EA_SCALABLE, REG_V5, REG_P13, REG_V27, REG_V5, + INS_OPTS_SCALABLE_S); // SEL ., , ., . // IF_SVE_EQ_3A // Note: Scalable size is the size of the destination , not the source . diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 1c06de940a415..99e86bb38c15c 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -2682,7 +2682,7 @@ void emitter::emitInsSve_R_R_I(instruction ins, assert(insScalableOptsNone(sopt)); if (isVectorRegister(reg1)) - { + { fmt = IF_SVE_JH_2A; } else @@ -3761,7 +3761,7 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isGeneralRegisterOrSP(reg3)); assert(insScalableOptsNone(sopt)); - fmt = IF_SVE_CQ_3A; + fmt = IF_SVE_CQ_3A; reg3 = encodingSPtoZR(reg3); // MOV is an alias for CPY, and is always the preferred disassembly. ins = INS_sve_mov; @@ -4149,7 +4149,7 @@ void emitter::emitInsSve_R_R_R(instruction ins, assert(isVectorRegister(reg3)); // mmmmm assert(insScalableOptsNone(sopt)); - if (isVectorRegister(reg2)) // nnnnn + if (isVectorRegister(reg2)) // nnnnn { fmt = IF_SVE_HK_3B; } diff --git a/src/coreclr/jit/instrsarm64sve.h b/src/coreclr/jit/instrsarm64sve.h index 345831eed6918..52a01668ae5a6 100644 --- a/src/coreclr/jit/instrsarm64sve.h +++ b/src/coreclr/jit/instrsarm64sve.h @@ -2840,7 +2840,6 @@ INST1(ldnt1sw, "ldnt1sw", 0, IF_SV INST1(st1q, "st1q", 0, IF_SVE_IY_4A, 0xE4202000 ) // ST1Q {.Q }, , [.D{, }] SVE_IY_4A 11100100001mmmmm 001gggnnnnnttttt E420 2000 - // clang-format on /*****************************************************************************/ From a1861d23a0ed4061e6504d198ba86773cd090ae7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 18 Jun 2024 07:44:55 -0700 Subject: [PATCH 09/10] proper handling of OperIsMemoryLoad() for Gather APIs --- src/coreclr/jit/gentree.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d4332600df1e7..e57df648f12c0 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27525,12 +27525,6 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend: case NI_Sve_GatherVectorUInt32ZeroExtend: addr = Op(2); - if (!varTypeIsI(addr)) - { - // For some variants, the address is in vector. - // Return false for such cases. - return false; - } break; #endif // TARGET_ARM64 @@ -27610,7 +27604,18 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const if (addr != nullptr) { - assert(varTypeIsI(addr)); +#ifdef TARGET_ARM64 + static_assert_no_msg( + AreContiguous(NI_Sve_GatherVector, NI_Sve_GatherVectorByteZeroExtend, NI_Sve_GatherVectorInt16SignExtend, + NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend, NI_Sve_GatherVectorInt32SignExtend, + NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend, NI_Sve_GatherVectorSByteSignExtend, + NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend, NI_Sve_GatherVectorUInt16ZeroExtend, + NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend, NI_Sve_GatherVectorUInt32ZeroExtend)); + assert(varTypeIsI(addr) || (varTypeIsSIMD(addr) && ((intrinsicId >= NI_Sve_GatherVector) && + (intrinsicId <= NI_Sve_GatherVectorUInt32ZeroExtend)))); +#else + assert(varTypeIsI(addr); +#endif return true; } From 3a300483f72d0c5fb5ab012a1d1e6739d00d4dc1 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 18 Jun 2024 10:09:08 -0700 Subject: [PATCH 10/10] review comments --- src/coreclr/jit/emitarm64.cpp | 11 ++--------- src/coreclr/jit/gentree.cpp | 2 +- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 3117bdbcb89d6..964a386241f95 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -4252,15 +4252,8 @@ void emitter::emitIns_Mov( { if (isPredicateRegister(dstReg) && isPredicateRegister(srcReg)) { - if (insOptsNone(opt)) - { - opt = INS_OPTS_SCALABLE_B; - } - else - { - assert(opt == INS_OPTS_SCALABLE_B); - } - + assert((opt == INS_OPTS_SCALABLE_B) || insOptsNone(opt)); + opt = INS_OPTS_SCALABLE_B; attr = EA_SCALABLE; if (IsRedundantMov(ins, size, dstReg, srcReg, canSkip)) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e57df648f12c0..dfa9af2dcdc94 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -27614,7 +27614,7 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const assert(varTypeIsI(addr) || (varTypeIsSIMD(addr) && ((intrinsicId >= NI_Sve_GatherVector) && (intrinsicId <= NI_Sve_GatherVectorUInt32ZeroExtend)))); #else - assert(varTypeIsI(addr); + assert(varTypeIsI(addr)); #endif return true; }