From 9512aabfa4cfaf9e35b7367781cdaff0e9f06805 Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Thu, 22 Feb 2024 20:05:41 -0500 Subject: [PATCH] JIT ARM64-SVE: Add encodings FL_3A to FX_3A (#98832) Part of #94549. Implements the following encodings: IF_SVE_FL_3A IF_SVE_FM_3A IF_SVE_FN_3A IF_SVE_FN_3B IF_SVE_FO_3A IF_SVE_FP_3A IF_SVE_FQ_3A IF_SVE_FS_3A IF_SVE_FW_3A IF_SVE_FX_3A --- src/coreclr/jit/codegenarm64test.cpp | 124 ++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 211 +++++++++++++++++++++++++++ 2 files changed, 335 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 503203aba3494c..1a7ae0ef7d0322 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5435,6 +5435,130 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_zipq2, EA_SCALABLE, REG_V12, REG_V13, REG_V14, INS_OPTS_SCALABLE_B); // ZIPQ2 ., ., . + // IF_SVE_FL_3A + theEmitter->emitIns_R_R_R(INS_sve_sabdlb, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // SABDLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sabdlt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_S); // SABDLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_saddlb, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_D); // SADDLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_saddlt, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_H); // SADDLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ssublb, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_S); // SSUBLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ssublt, EA_SCALABLE, REG_V15, REG_V16, REG_V17, + INS_OPTS_SCALABLE_D); // SSUBLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uabdlb, EA_SCALABLE, REG_V18, REG_V19, REG_V20, + INS_OPTS_SCALABLE_H); // UABDLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uabdlt, EA_SCALABLE, REG_V21, REG_V22, REG_V24, + INS_OPTS_SCALABLE_S); // UABDLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uaddlb, EA_SCALABLE, REG_V24, REG_V25, REG_V26, + INS_OPTS_SCALABLE_D); // UADDLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uaddlt, EA_SCALABLE, REG_V27, REG_V28, REG_V29, + INS_OPTS_SCALABLE_H); // UADDLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_usublb, EA_SCALABLE, REG_V30, REG_V31, REG_V0, + INS_OPTS_SCALABLE_S); // USUBLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_usublt, EA_SCALABLE, REG_V1, REG_V2, REG_V3, + INS_OPTS_SCALABLE_D); // USUBLT ., ., . + + // IF_SVE_FM_3A + theEmitter->emitIns_R_R_R(INS_sve_saddwb, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // SADDWB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_saddwt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_S); // SADDWT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ssubwb, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_D); // SSUBWB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ssubwt, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_H); // SSUBWT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uaddwb, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_S); // UADDWB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uaddwt, EA_SCALABLE, REG_V15, REG_V16, REG_V17, + INS_OPTS_SCALABLE_D); // UADDWT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_usubwb, EA_SCALABLE, REG_V18, REG_V19, REG_V20, + INS_OPTS_SCALABLE_H); // USUBWB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_usubwt, EA_SCALABLE, REG_V21, REG_V22, REG_V23, + INS_OPTS_SCALABLE_S); // USUBWT ., ., . + + // IF_SVE_FN_3A + theEmitter->emitIns_R_R_R(INS_sve_pmullb, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // PMULLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_pmullt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_D); // PMULLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_smullb, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_H); // SMULLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_smullt, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_D); // SMULLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqdmullb, EA_SCALABLE, REG_V12, REG_V13, REG_V14, + INS_OPTS_SCALABLE_H); // SQDMULLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sqdmullt, EA_SCALABLE, REG_V15, REG_V16, REG_V17, + INS_OPTS_SCALABLE_D); // SQDMULLT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_umullb, EA_SCALABLE, REG_V18, REG_V19, REG_V20, + INS_OPTS_SCALABLE_H); // UMULLB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_umullt, EA_SCALABLE, REG_V21, REG_V22, REG_V23, + INS_OPTS_SCALABLE_D); // UMULLT ., ., . + + // IF_SVE_FN_3B + theEmitter->emitIns_R_R_R(INS_sve_pmullb, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_Q); // PMULLB .Q, .D, .D + theEmitter->emitIns_R_R_R(INS_sve_pmullt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_Q); // PMULLT .Q, .D, .D + + // IF_SVE_FO_3A + theEmitter->emitIns_R_R_R(INS_sve_smmla, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_S); // SMMLA .S, .B, .B + theEmitter->emitIns_R_R_R(INS_sve_ummla, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_S); // UMMLA .S, .B, .B + theEmitter->emitIns_R_R_R(INS_sve_usmmla, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_S); // USMMLA .S, .B, .B + + // IF_SVE_FP_3A + theEmitter->emitIns_R_R_R(INS_sve_eorbt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_B); // EORBT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_eorbt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_H); // EORBT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_eortb, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_S); // EORTB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_eortb, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_D); // EORTB ., ., . + + // IF_SVE_FQ_3A + theEmitter->emitIns_R_R_R(INS_sve_bdep, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_B); // BDEP ., ., . + theEmitter->emitIns_R_R_R(INS_sve_bext, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_H); // BEXT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_bgrp, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_S); // BGRP ., ., . + theEmitter->emitIns_R_R_R(INS_sve_bgrp, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_D); // BGRP ., ., . + + // IF_SVE_FS_3A + theEmitter->emitIns_R_R_R(INS_sve_saddlbt, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // SADDLBT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ssublbt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_S); // SSUBLBT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_ssubltb, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_D); // SSUBLTB ., ., . + + // IF_SVE_FW_3A + theEmitter->emitIns_R_R_R(INS_sve_saba, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_B); // SABA ., ., . + theEmitter->emitIns_R_R_R(INS_sve_saba, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_H); // SABA ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uaba, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_S); // UABA ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uaba, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_D); // UABA ., ., . + + // IF_SVE_FX_3A + theEmitter->emitIns_R_R_R(INS_sve_sabalb, EA_SCALABLE, REG_V0, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // SABALB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_sabalt, EA_SCALABLE, REG_V3, REG_V4, REG_V5, + INS_OPTS_SCALABLE_S); // SABALT ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uabalb, EA_SCALABLE, REG_V6, REG_V7, REG_V8, + INS_OPTS_SCALABLE_D); // UABALB ., ., . + theEmitter->emitIns_R_R_R(INS_sve_uabalt, EA_SCALABLE, REG_V9, REG_V10, REG_V11, + INS_OPTS_SCALABLE_H); // UABALT ., ., . + // IF_SVE_BL_1A theEmitter->emitIns_R_PATTERN_I(INS_sve_cntb, EA_8BYTE, REG_R0, SVE_PATTERN_POW2, 1); // CNTB {, {, MUL #}} diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4159844020cb32..ec4627adc66d7f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1086,6 +1086,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long elemsize = id->idOpSize(); assert(insOptsScalableStandard(id->idInsOpt())); // xx assert(isVectorRegister(id->idReg1())); // ddddd @@ -1179,6 +1187,8 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) case IF_SVE_EW_3B: // ...........mmmmm ......aaaaaddddd -- SVE2 multiply-add (checked pointer) + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate assert(insOptsScalable(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn/mmmmm @@ -11066,6 +11076,138 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_EX_3A; break; + case INS_sve_saddlb: + case INS_sve_saddlt: + case INS_sve_uaddlb: + case INS_sve_uaddlt: + case INS_sve_ssublb: + case INS_sve_ssublt: + case INS_sve_usublb: + case INS_sve_usublt: + case INS_sve_sabdlb: + case INS_sve_sabdlt: + case INS_sve_uabdlb: + case INS_sve_uabdlt: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FL_3A; + break; + + case INS_sve_saddwb: + case INS_sve_saddwt: + case INS_sve_uaddwb: + case INS_sve_uaddwt: + case INS_sve_ssubwb: + case INS_sve_ssubwt: + case INS_sve_usubwb: + case INS_sve_usubwt: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FM_3A; + break; + + case INS_sve_smullb: + case INS_sve_smullt: + case INS_sve_umullb: + case INS_sve_umullt: + case INS_sve_sqdmullb: + case INS_sve_sqdmullt: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FN_3A; + break; + + case INS_sve_pmullb: + case INS_sve_pmullt: + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + + if (opt == INS_OPTS_SCALABLE_Q) + { + fmt = IF_SVE_FN_3B; + } + else + { + assert((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_D)); + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FN_3A; + } + break; + + case INS_sve_smmla: + case INS_sve_usmmla: + case INS_sve_ummla: + assert(opt == INS_OPTS_SCALABLE_S); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + fmt = IF_SVE_FO_3A; + break; + + case INS_sve_eorbt: + case INS_sve_eortb: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FP_3A; + break; + + case INS_sve_bext: + case INS_sve_bdep: + case INS_sve_bgrp: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FQ_3A; + break; + + case INS_sve_saddlbt: + case INS_sve_ssublbt: + case INS_sve_ssubltb: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FS_3A; + break; + + case INS_sve_saba: + case INS_sve_uaba: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FW_3A; + break; + + case INS_sve_sabalb: + case INS_sve_sabalt: + case INS_sve_uabalb: + case INS_sve_uabalt: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + fmt = IF_SVE_FX_3A; + break; + case INS_sve_clz: case INS_sve_cls: case INS_sve_cnt: @@ -22868,6 +23010,14 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp case IF_SVE_EX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector elements (quadwords) + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -23015,6 +23165,8 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_EW_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 multiply-add (checked pointer) case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -26678,12 +26830,15 @@ void emitter::emitDispInsHelp( case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high // (unpredicated) + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient case IF_SVE_BR_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE permute vector segments case IF_SVE_CA_3A: // ........xx.mmmmm ......nnnnnddddd -- sve_int_perm_tbxquads case IF_SVE_EV_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer clamp // ., ., . case IF_SVE_EM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add high + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate // .Q, .Q, .Q case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments // .D, .D, .D @@ -27347,6 +27502,8 @@ void emitter::emitDispInsHelp( // ., ., . case IF_SVE_EH_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE integer dot product (unpredicated) + // .S, .B, .B + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate { const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 2); emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -27359,6 +27516,13 @@ void emitter::emitDispInsHelp( case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long + // ., ., . + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long + // .Q, .D, .D + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long { const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1); emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -27367,6 +27531,16 @@ void emitter::emitDispInsHelp( break; } + // ., ., . + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + { + const insOpts smallSizeSpecifier = (insOpts)(id->idInsOpt() - 1); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), smallSizeSpecifier, false); // mmmmm + break; + } + // CDOT ., ., ., case IF_SVE_EJ_3A: // ........xx.mmmmm ....rrnnnnnddddd -- SVE2 complex integer dot product { @@ -30374,10 +30548,43 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BR_3B: // ...........mmmmm ......nnnnnddddd -- SVE permute vector segments case IF_SVE_BZ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) case IF_SVE_BZ_3A_A: // ........xx.mmmmm ......nnnnnddddd -- SVE table lookup (three sources) + case IF_SVE_FL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract long + case IF_SVE_FM_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract wide + case IF_SVE_FP_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise exclusive-or interleaved + case IF_SVE_FS_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer add/subtract interleaved long result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_FQ_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 bitwise permute + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_6C; + break; + + case IF_SVE_FN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply long + switch (ins) + { + case INS_sve_smullb: + case INS_sve_smullt: + case INS_sve_umullb: + case INS_sve_umullt: + case INS_sve_sqdmullb: + case INS_sve_sqdmullt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case INS_sve_pmullb: + case INS_sve_pmullt: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register // increment) result.insThroughput = PERFSCORE_THROUGHPUT_2X; @@ -30390,11 +30597,13 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_BK_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE floating-point trig select coefficient + case IF_SVE_FO_3A: // ...........mmmmm ......nnnnnddddd -- SVE integer matrix multiply accumulate result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_3C; break; case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) + case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_2C; break; @@ -30463,6 +30672,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_EL_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply-add long case IF_SVE_EN_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add interleaved long case IF_SVE_EO_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 saturating multiply-add long + case IF_SVE_FW_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate + case IF_SVE_FX_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer absolute difference and accumulate long result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break;