From 962d15c8e1773da1f975a0691581d73bfc2b1cc0 Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Mon, 4 Mar 2024 09:38:56 -0500 Subject: [PATCH] JIT ARM64-SVE: Add AW_2A to AZ_2A, BM_1A, BN_1A (#99211) Part of #94549. Adds the following encodings: SVE_AW_2A SVE_AX_1A SVE_AY_2A SVE_AZ_2A SVE_BM_1A SVE_BN_1A --- src/coreclr/jit/codegenarm64test.cpp | 88 ++++++ src/coreclr/jit/emitarm64.cpp | 406 ++++++++++++++++++++++++++- src/coreclr/jit/emitarm64.h | 24 +- src/coreclr/jit/instr.h | 6 +- 4 files changed, 503 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 90aafb0e4b377..72a56b236a455 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5755,6 +5755,62 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_nbsl, EA_SCALABLE, REG_V15, REG_V16, REG_V17, INS_OPTS_SCALABLE_D); // NBSL .D, .D, .D, .D + // IF_SVE_AW_2A + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V0, REG_V1, 1, + INS_OPTS_SCALABLE_B); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V2, REG_V3, 8, + INS_OPTS_SCALABLE_B); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V4, REG_V5, 2, + INS_OPTS_SCALABLE_H); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V6, REG_V7, 16, + INS_OPTS_SCALABLE_H); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V8, REG_V9, 3, + INS_OPTS_SCALABLE_S); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V10, REG_V11, 32, + INS_OPTS_SCALABLE_S); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V12, REG_V13, 4, + INS_OPTS_SCALABLE_D); // XAR ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_xar, EA_SCALABLE, REG_V14, REG_V15, 64, + INS_OPTS_SCALABLE_D); // XAR ., ., ., # + + // IF_SVE_AX_1A + theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V0, -16, 15, + INS_OPTS_SCALABLE_B); // INDEX ., #, # + theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V1, 15, -16, + INS_OPTS_SCALABLE_H); // INDEX ., #, # + theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V2, 0, 0, + INS_OPTS_SCALABLE_S); // INDEX ., #, # + theEmitter->emitIns_R_I_I(INS_sve_index, EA_SCALABLE, REG_V3, -5, 5, + INS_OPTS_SCALABLE_D); // INDEX ., #, # + + // IF_SVE_AY_2A + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V0, REG_R0, -16, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_IMM_FIRST); // INDEX ., #, + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V1, REG_R1, 0, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_IMM_FIRST); // INDEX ., #, + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V2, REG_R2, 5, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_IMM_FIRST); // INDEX ., #, + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V3, REG_R3, 10, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_IMM_FIRST); // INDEX ., #, + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V4, REG_ZR, -16, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_IMM_FIRST); // INDEX ., #, + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V5, REG_ZR, 15, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_IMM_FIRST); // INDEX ., #, + + // IF_SVE_AZ_2A + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V0, REG_R0, -16, + INS_OPTS_SCALABLE_B); // INDEX ., , # + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V1, REG_R1, 0, + INS_OPTS_SCALABLE_H); // INDEX ., , # + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V2, REG_R2, 5, + INS_OPTS_SCALABLE_S); // INDEX ., , # + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V3, REG_R3, 10, + INS_OPTS_SCALABLE_D); // INDEX ., , # + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V4, REG_ZR, -16, + INS_OPTS_SCALABLE_B); // INDEX ., , # + theEmitter->emitIns_R_R_I(INS_sve_index, EA_SCALABLE, REG_V5, REG_ZR, 15, + INS_OPTS_SCALABLE_D); // INDEX ., , # + // IF_SVE_BB_2A theEmitter->emitIns_R_R_I(INS_sve_addpl, EA_8BYTE, REG_R0, REG_R1, -32); // ADDPL , , # theEmitter->emitIns_R_R_I(INS_sve_addpl, EA_8BYTE, REG_R2, REG_SP, 0); // ADDPL , , # @@ -5784,6 +5840,38 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_PATTERN_I(INS_sve_cnth, EA_8BYTE, REG_R5, SVE_PATTERN_ALL, 13); // CNTH {, {, MUL #}} + // IF_SVE_BM_1A + theEmitter->emitIns_R_PATTERN_I(INS_sve_decb, EA_8BYTE, REG_R0, SVE_PATTERN_POW2, + 1); // DECB {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_decd, EA_8BYTE, REG_R1, SVE_PATTERN_VL16, + 3); // DECD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_dech, EA_8BYTE, REG_R2, SVE_PATTERN_VL32, + 5); // DECH {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_decw, EA_8BYTE, REG_R3, SVE_PATTERN_VL64, + 7); // DECW {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_incb, EA_8BYTE, REG_R4, SVE_PATTERN_VL128, + 9); // INCB {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_incd, EA_8BYTE, REG_R5, SVE_PATTERN_MUL3, + 10); // INCD {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_inch, EA_8BYTE, REG_R6, SVE_PATTERN_MUL4, + 13); // INCH {, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_incw, EA_8BYTE, REG_R7, SVE_PATTERN_ALL, + 16); // INCW {, {, MUL #}} + + // IF_SVE_BN_1A + theEmitter->emitIns_R_PATTERN_I(INS_sve_decd, EA_SCALABLE, REG_V0, SVE_PATTERN_POW2, 1, + INS_OPTS_SCALABLE_D); // DECD .D{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_dech, EA_SCALABLE, REG_V1, SVE_PATTERN_VL2, 2, + INS_OPTS_SCALABLE_H); // DECH .H{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_decw, EA_SCALABLE, REG_V2, SVE_PATTERN_VL3, 4, + INS_OPTS_SCALABLE_S); // DECW .S{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_incd, EA_SCALABLE, REG_V3, SVE_PATTERN_VL4, 8, + INS_OPTS_SCALABLE_D); // INCD .D{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_inch, EA_SCALABLE, REG_V4, SVE_PATTERN_VL5, 12, + INS_OPTS_SCALABLE_H); // INCH .H{, {, MUL #}} + theEmitter->emitIns_R_PATTERN_I(INS_sve_incw, EA_SCALABLE, REG_V5, SVE_PATTERN_VL6, 16, + INS_OPTS_SCALABLE_S); // INCW .S{, {, MUL #}} + // IF_SVE_CI_3A theEmitter->emitIns_R_R_R(INS_sve_trn1, EA_SCALABLE, REG_P1, REG_P3, REG_P4, INS_OPTS_SCALABLE_B); // TRN1 ., ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 4f9cb501a3416..f96bac7fc3f11 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1137,6 +1137,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count elemsize = id->idOpSize(); assert(id->idInsOpt() == INS_OPTS_NONE); assert(isGeneralRegister(id->idReg1())); @@ -1144,6 +1145,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm4From1(emitGetInsSC(id))); break; + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(elemsize == EA_SCALABLE); + assert(isValidUimm4From1(emitGetInsSC(id))); + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector assert(isPredicateRegister(id->idReg1())); // DDDD assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1748,6 +1757,64 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidSimm6(emitGetInsSC(id))); // iiiiii break; + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm + { + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx xx + const ssize_t imm = emitGetInsSC(id); + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm3From1(imm)); // iii + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm4From1(imm)); // xiii + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm5From1(imm)); // xxiii + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidUimm6From1(imm)); // xx xiii + break; + + default: + unreached(); + break; + } + break; + } + + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + { + ssize_t imm1; + ssize_t imm2; + insDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm5(imm1)); // iiiii + assert(isValidSimm5(imm2)); // iiiii + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + } + + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isValidSimm5(emitGetInsSC(id))); // iiiii + assert(isIntegerRegister(id->idReg2())); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(id->idInsOpt()))); // xx + break; + case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long { assert(insOptsScalableWide(id->idInsOpt())); @@ -9288,6 +9355,17 @@ void emitter::emitIns_R_I_I(instruction ins, } break; + case INS_sve_index: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg)); // ddddd + assert(isValidSimm5(imm1)); // iiiii + assert(isValidSimm5(imm2)); // iiiii + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + immOut = insEncodeTwoSimm5(imm1, imm2); + canEncode = true; + fmt = IF_SVE_AX_1A; + break; + default: unreached(); break; @@ -9300,6 +9378,7 @@ void emitter::emitIns_R_I_I(instruction ins, id->idIns(ins); id->idInsFmt(fmt); + id->idInsOpt(opt); id->idReg1(reg); @@ -9917,6 +9996,56 @@ void emitter::emitIns_R_R_I(instruction ins, } break; + case INS_sve_xar: + assert(insOptsScalableStandard(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx xx + + switch (opt) + { + case INS_OPTS_SCALABLE_B: + assert(isValidUimm3From1(imm)); // iii + break; + + case INS_OPTS_SCALABLE_H: + assert(isValidUimm4From1(imm)); // xiii + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm5From1(imm)); // xxiii + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidUimm6From1(imm)); // x xxiii + break; + + default: + unreached(); + break; + } + + fmt = IF_SVE_AW_2A; + break; + + case INS_sve_index: + assert(insOptsScalable(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isValidSimm5(imm)); // iiiii + assert(isIntegerRegister(reg2)); // mmmmm + assert(isValidVectorElemsize(optGetSveElemsize(opt))); // xx + + if (sopt == INS_SCALABLE_OPTS_IMM_FIRST) + { + fmt = IF_SVE_AY_2A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_AZ_2A; + } + break; + case INS_sve_addvl: case INS_sve_addpl: assert(insOptsNone(opt)); @@ -16296,7 +16425,7 @@ void emitter::emitIns_R_R_FLAGS_COND( */ void emitter::emitIns_R_I_FLAGS_COND( - instruction ins, emitAttr attr, regNumber reg, int imm, insCflags flags, insCond cond) + instruction ins, emitAttr attr, regNumber reg, ssize_t imm, insCflags flags, insCond cond) { insFormat fmt = IF_NONE; condFlagsImm cfi; @@ -16392,7 +16521,12 @@ void emitter::emitIns_R_PATTERN( * Add an instruction referencing a register, a SVE Pattern and an immediate. */ -void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm) +void emitter::emitIns_R_PATTERN_I(instruction ins, + emitAttr attr, + regNumber reg1, + insSvePattern pattern, + ssize_t imm, + insOpts opt /* = INS_OPTS_NONE */) { emitAttr size = EA_SIZE(attr); emitAttr elemsize = EA_UNKNOWN; @@ -16405,12 +16539,43 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1 case INS_sve_cntd: case INS_sve_cnth: case INS_sve_cntw: - assert(isGeneralRegister(reg1)); + assert(insOptsNone(opt)); + assert(isGeneralRegister(reg1)); // ddddd + assert(isValidUimm4From1(imm)); // iiii assert(size == EA_8BYTE); - assert(isValidUimm4From1(imm)); fmt = IF_SVE_BL_1A; break; + case INS_sve_incd: + case INS_sve_inch: + case INS_sve_incw: + case INS_sve_decd: + case INS_sve_dech: + case INS_sve_decw: + assert(isValidUimm4From1(imm)); // iiii + + if (insOptsNone(opt)) + { + assert(isGeneralRegister(reg1)); // ddddd + assert(size == EA_8BYTE); + fmt = IF_SVE_BM_1A; + } + else + { + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + fmt = IF_SVE_BN_1A; + } + break; + + case INS_sve_incb: + case INS_sve_decb: + assert(isGeneralRegister(reg1)); // ddddd + assert(isValidUimm4From1(imm)); // iiii + assert(size == EA_8BYTE); + fmt = IF_SVE_BM_1A; + break; + default: unreached(); break; @@ -16422,6 +16587,7 @@ void emitter::emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1 id->idIns(ins); id->idInsFmt(fmt); + id->idInsOpt(opt); id->idReg1(reg1); id->idSvePattern(pattern); @@ -19321,10 +19487,10 @@ void emitter::emitIns_Call(EmitCallType callType, /***************************************************************************** * * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction - * This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. + * This specifically encodes the field 'tszh:tszl' at bit locations '23-22:20-19'. */ -/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size) +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_tszh_23_tszl_20_to_19(emitAttr size) { switch (size) { @@ -19337,6 +19503,9 @@ void emitter::emitIns_Call(EmitCallType callType, case EA_4BYTE: return 0x400000; // set the bit at location 22 + case EA_8BYTE: + return 0x800000; // set the bit at location 23 + default: assert(!"Invalid size for vector register"); } @@ -21476,6 +21645,21 @@ void emitter::emitIns_Call(EmitCallType callType, return insEncodeUimm6_21_to_16(imm / 8); } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 5-bits at bit locations '9-5'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSimm5_9_to_5(ssize_t imm) +{ + assert(isValidSimm5(imm)); + if (imm < 0) + { + imm = (imm & 0x1F); + } + return (code_t)imm << 5; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. @@ -21738,6 +21922,10 @@ void emitter::emitIns_Call(EmitCallType callType, assert(isValidUimm5From1(imm)); return (32 - imm); + case INS_OPTS_SCALABLE_D: + assert(isValidUimm6From1(imm)); + return (64 - imm); + default: unreached(); break; @@ -21746,6 +21934,69 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the two 5-bit signed immediates encoded in the following format: + * njjj jjmi iiii + * - iiiii: the absolute value of imm1 + * - m: 1 if imm1 is negative, 0 otherwise + * - jjjjj: the absolute value of imm2 + * - n: 1 if imm2 is negative, 0 otherwise + */ +/*static*/ ssize_t emitter::insEncodeTwoSimm5(ssize_t imm1, ssize_t imm2) +{ + assert(isValidSimm5(imm1)); + assert(isValidSimm5(imm2)); + ssize_t immOut = 0; + + if (imm1 < 0) + { + // Set bit location 5 to indicate imm1 is negative + immOut |= 0x20; + imm1 *= -1; + } + + if (imm2 < 0) + { + // Set bit location 11 to indicate imm2 is negative + immOut |= 0x800; + imm2 *= -1; + } + + immOut |= imm1; + immOut |= (imm2 << 6); + return immOut; +} + +/***************************************************************************** + * + * Decodes imm into two 5-bit signed immediates, + * using the encoding format from insEncodeTwoSimm5. + */ +/*static*/ void emitter::insDecodeTwoSimm5(ssize_t imm, /* OUT */ ssize_t* const imm1, /* OUT */ ssize_t* const imm2) +{ + assert(imm1 != nullptr); + assert(imm2 != nullptr); + + *imm1 = (imm & 0x1F); + + if ((imm & 0x20) != 0) + { + *imm1 *= -1; + } + + imm >>= 6; + *imm2 = (imm & 0x1F); + + if ((imm & 0x20) != 0) + { + *imm2 *= -1; + } + + assert(isValidSimm5(*imm1)); + assert(isValidSimm5(*imm2)); +} + /***************************************************************************** * * Returns the encoding to select an insSvePattern @@ -24094,8 +24345,9 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; - // Immediate and patterm to general purpose. + // Immediate and pattern to general purpose. case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count imm = emitGetInsSC(id); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_Rd(id->idReg1()); // ddddd @@ -24104,6 +24356,15 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeSvePattern(id->idSvePattern()); // ppppp + code |= insEncodeUimm4From1_19_to_16(imm); // iiii + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_CE_2A: // ................ ......nnnnn.DDDD -- SVE move predicate from vector code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD @@ -24330,6 +24591,52 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm + imm = insGetImmDiff(emitGetInsSC(id), id->idInsOpt()); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // mmmmm + code |= insEncodeUimm5_20_to_16(imm & 0b11111); // xxiii + code |= insEncodeImm1_22(imm >> 5); // x + code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + { + ssize_t imm1; + ssize_t imm2; + insDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeSimm5_9_to_5(imm1); // iiiii + code |= insEncodeSimm5_20_to_16(imm2); // iiiii + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + } + + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeSimm5_9_to_5(emitGetInsSC(id)); // iiiii + code |= insEncodeReg_R_20_to_16(id->idReg2()); // mmmmm + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_R_9_to_5(id->idReg2()); // mmmmm + code |= insEncodeSimm5_20_to_16(emitGetInsSC(id)); // iiiii + code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_BB_2A: // ...........nnnnn .....iiiiiiddddd -- SVE stack frame adjustment code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_R_4_to_0(id->idReg1()); // ddddd @@ -24626,29 +24933,36 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) case IF_SVE_GD_2A: // .........x.xx... ......nnnnnddddd -- SVE2 saturating extract narrow code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn - code |= insEncodeSveElemsize_tszh_22_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + // Bit 23 should not be set by below call + assert(insOptsScalableWide(id->idInsOpt())); + code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx // x dst += emitOutput_Instr(dst, code); break; case IF_SVE_FR_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift left long code = emitInsCodeSve(ins, fmt); - code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd - code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn - code |= insEncodeUimm5_20_to_16(emitGetInsSC(id)); // iii - code |= insEncodeSveElemsize_tszh_22_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeUimm5_20_to_16(emitGetInsSC(id)); // iii + // Bit 23 should not be set by below call + assert(insOptsScalableWide(id->idInsOpt())); + code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx // x dst += emitOutput_Instr(dst, code); break; case IF_SVE_GB_2A: // .........x.xxiii ......nnnnnddddd -- SVE2 bitwise shift right narrow + // Bit 23 should not be set by call to insEncodeSveElemsize_tszh_23_tszl_20_to_19, + // nor should we pass INS_OPTS_SCALABLE_D to insGetImmDiff. + assert(insOptsScalableWide(id->idInsOpt())); code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn code |= insEncodeUimm5_20_to_16(insGetImmDiff(emitGetInsSC(id), id->idInsOpt())); // iii - code |= insEncodeSveElemsize_tszh_22_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx + code |= insEncodeSveElemsize_tszh_23_tszl_20_to_19(optGetSveElemsize(id->idInsOpt())); // xx // x dst += emitOutput_Instr(dst, code); break; @@ -28144,6 +28458,41 @@ void emitter::emitDispInsHelp( emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // kkkkk break; + // ., #, # + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + { + ssize_t imm1; + ssize_t imm2; + insDecodeTwoSimm5(emitGetInsSC(id), &imm1, &imm2); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(imm1, true); // iiiii + emitDispImm(imm2, false); // iiiii + break; + } + + // ., #, + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + { + const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispImm(emitGetInsSC(id), true); // iiiii + emitDispReg(id->idReg2(), intRegSize, false); // mmmmm + break; + } + + // ., , # + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) + { + const emitAttr intRegSize = (id->idInsOpt() == INS_OPTS_SCALABLE_D) ? EA_8BYTE : EA_4BYTE; + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispReg(id->idReg2(), intRegSize, true); // mmmmm + emitDispImm(emitGetInsSC(id), false); // iiiii + break; + } + // .H, .B, .B case IF_SVE_GN_3A: // ...........mmmmm ......nnnnnddddd -- SVE2 FP8 multiply-add long case IF_SVE_HA_3A_E: // ...........mmmmm ......nnnnnddddd -- SVE BFloat16 floating-point dot product @@ -28198,7 +28547,10 @@ void emitter::emitDispInsHelp( emitDispReg(id->idReg3(), size, false); // mmmmm break; + // {, {, MUL #}} + // {, {, MUL #}} case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count imm = emitGetInsSC(id); emitDispReg(id->idReg1(), size, true); // ddddd emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp @@ -28209,6 +28561,20 @@ void emitter::emitDispInsHelp( } break; + // .D{, {, MUL #}} + // .H{, {, MUL #}} + // .S{, {, MUL #}} + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSvePattern(id->idSvePattern(), (imm > 1)); // ppppp + if (imm > 1) + { + printf("mul "); + emitDispImm(emitGetInsSC(id), false, false); // iiii + } + break; + // ., ., .D case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -29485,6 +29851,7 @@ void emitter::emitDispInsHelp( // ., ., ., # case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm emitDispSveReg(id->idReg1(), id->idInsOpt(), true); emitDispSveReg(id->idReg1(), id->idInsOpt(), true); emitDispSveReg(id->idReg2(), id->idInsOpt(), true); @@ -32149,6 +32516,12 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BA_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE index generation (register start, register // increment) + case IF_SVE_AX_1A: // ........xx.iiiii ......iiiiiddddd -- SVE index generation (immediate start, immediate + // increment) + case IF_SVE_AY_2A: // ........xx.mmmmm ......iiiiiddddd -- SVE index generation (immediate start, register + // increment) + case IF_SVE_AZ_2A: // ........xx.iiiii ......nnnnnddddd -- SVE index generation (register start, immediate + // increment) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_8C; break; @@ -32161,6 +32534,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_BL_1A: // ............iiii ......pppppddddd -- SVE element count + case IF_SVE_BM_1A: // ............iiii ......pppppddddd -- SVE inc/dec register by element count + case IF_SVE_BN_1A: // ............iiii ......pppppddddd -- SVE inc/dec vector by element count result.insThroughput = PERFSCORE_THROUGHPUT_2C; result.insLatency = PERFSCORE_LATENCY_2C; break; @@ -32174,6 +32549,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BG_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE bitwise shift by wide elements (unpredicated) case IF_SVE_FN_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply long case IF_SVE_BD_3B: // ...........mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) + case IF_SVE_AW_2A: // ........xx.xxiii ......mmmmmddddd -- sve_int_rotate_imm result.insThroughput = PERFSCORE_THROUGHPUT_1C; result.insLatency = PERFSCORE_LATENCY_2C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index e466311a1156e..bb5abee8d2b5f 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -535,8 +535,8 @@ static code_t insEncodeSveElemsize_18_to_17(emitAttr size); static code_t insEncodeSveElemsize_sz_21(emitAttr size); // Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 SVE vector instruction -// This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. -static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// This specifically encodes the field 'tszh:tszl' at bit locations '23-22:20-19'. +static code_t insEncodeSveElemsize_tszh_23_tszl_20_to_19(emitAttr size); // Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction at bit location '30' or // '21'. @@ -687,6 +687,9 @@ static code_t insEncodeUimm6_MultipleOf4_21_to_16(ssize_t imm); // Returns the encoding for the immediate value that is a multiple of 8 as 6-bits at bit locations '21-16'. static code_t insEncodeUimm6_MultipleOf8_21_to_16(ssize_t imm); +// Returns the encoding for the immediate value as 5-bits at bit locations '9-5'. +static code_t insEncodeSimm5_9_to_5(ssize_t imm); + // Returns the encoding for the immediate value as 5-bits at bit locations '20-16'. static code_t insEncodeSimm5_20_to_16(ssize_t imm); @@ -749,6 +752,12 @@ static code_t insEncodeSveElemsize_R_22(emitAttr size); // Returns the immediate value for instructions that encode it as a difference from tszh:tszl:imm3. static ssize_t insGetImmDiff(const ssize_t imm, const insOpts opt); +// Returns the two 5-bit signed immediates encoded as one ssize_t. +static ssize_t insEncodeTwoSimm5(ssize_t imm1, ssize_t imm2); + +// Decodes imm into two 5-bit signed immediates, using the encoding format from insEncodeTwoSimm5. +static void insDecodeTwoSimm5(ssize_t imm, /* OUT */ ssize_t* const imm1, /* OUT */ ssize_t* const imm2); + // Returns the encoding to select an insSvePattern static code_t insEncodeSvePattern(insSvePattern pattern); @@ -903,6 +912,12 @@ static bool isValidUimm5From1(ssize_t value) return (1 <= value) && (value <= 0x20); }; +// Returns true if 'value' is a legal unsigned immediate 6 bit encoding, starting from 1 (such as for XAR). +static bool isValidUimm6From1(ssize_t value) +{ + return (1 <= value) && (value <= 0x40); +}; + // Returns true if 'value' is a legal unsigned immediate 7 bit encoding (such as for CMPLT, CMPNE). static bool isValidUimm7(ssize_t value) { @@ -1586,12 +1601,13 @@ void emitIns_R_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumbe void emitIns_R_R_FLAGS_COND( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCflags flags, insCond cond); -void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, int imm, insCflags flags, insCond cond); +void emitIns_R_I_FLAGS_COND(instruction ins, emitAttr attr, regNumber reg1, ssize_t imm, insCflags flags, insCond cond); void emitIns_R_PATTERN( instruction ins, emitAttr attr, regNumber reg1, insOpts opt, insSvePattern pattern = SVE_PATTERN_ALL); -void emitIns_R_PATTERN_I(instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, int imm); +void emitIns_R_PATTERN_I( + instruction ins, emitAttr attr, regNumber reg1, insSvePattern pattern, ssize_t imm, insOpts opt = INS_OPTS_NONE); void emitIns_PRFOP_R_R_R(instruction ins, emitAttr attr, diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 1ff07e4301719..6d99d81cd9df6 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -377,6 +377,8 @@ enum insScalableOpts : unsigned INS_SCALABLE_OPTS_WITH_VECTOR_PAIR, // Variants with {., .} sve register pair (eg splice) + INS_SCALABLE_OPTS_IMM_FIRST, // Variants with an immediate and a register, where the immediate comes first + // Removable once REG_V0 and REG_P0 are distinct INS_SCALABLE_OPTS_UNPREDICATED, // Variants without a predicate (eg add) INS_SCALABLE_OPTS_UNPREDICATED_WIDE, // Variants without a predicate and wide elements (eg asr) @@ -401,8 +403,8 @@ enum insSvePattern : unsigned SVE_PATTERN_VL64 = 11, // 64 elements. SVE_PATTERN_VL128 = 12, // 128 elements. SVE_PATTERN_VL256 = 13, // 256 elements. - SVE_PATTERN_MUL4 = 29, // The largest multiple of 3. - SVE_PATTERN_MUL3 = 30, // The largest multiple of 4. + SVE_PATTERN_MUL4 = 29, // The largest multiple of 4. + SVE_PATTERN_MUL3 = 30, // The largest multiple of 3. SVE_PATTERN_ALL = 31 // All available (implicitly a multiple of two). };