From c5b31f047f82d924105e1226049fc96fa227c8fc Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Sun, 4 Feb 2024 00:02:50 -0500 Subject: [PATCH] Add IF_SVE_FD_3A, IF_SVE_FD_3B, IF_SVE_FD_3C --- src/coreclr/jit/codegenarm64test.cpp | 30 ++++++++ src/coreclr/jit/emitarm64.cpp | 101 +++++++++++++++++++++++++++ src/coreclr/jit/emitarm64.h | 9 +++ 3 files changed, 140 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 712cbb42feeaa..fef2eb6f1fd13 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5848,6 +5848,36 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_usdot, EA_SCALABLE, REG_V23, REG_V24, REG_V3, 3, INS_OPTS_SCALABLE_B); // USDOT .S, .B, .B[] + // IF_SVE_FD_3A + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + + // IF_SVE_FD_3B + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V8, REG_V9, REG_V1, 0, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V10, REG_V11, REG_V3, 1, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V12, REG_V13, REG_V5, 2, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 3, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + + // IF_SVE_FD_3C + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 9f254756d1e19..a0e567a700911 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1141,6 +1141,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1149,6 +1150,24 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm2(emitGetInsSC(id))); // ii break; + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); + assert(isValidUimm3(emitGetInsSC(id))); // iii + break; + + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); + assert(isValidImm1(emitGetInsSC(id))); // i + break; + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); assert(isPredicateRegister(id->idReg1())); // DDDD @@ -10411,6 +10430,38 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_EZ_3A; break; + case INS_sve_mul: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + switch (opt) + { + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); // iii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3A; + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3B; + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); // i + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + fmt = IF_SVE_FD_3C; + break; + + default: + unreached(); + break; + } + break; + case INS_fmul: // by element, imm[0..3] selects the element of reg3 case INS_fmla: case INS_fmls: @@ -16003,6 +16054,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 19; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 1 bit at bit location '22'. + */ + +/*static*/ emitter::code_t emitter::insEncodeImm1_22(ssize_t imm) +{ + assert(isValidImm1(imm)); + return (code_t)imm << 22; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. @@ -18216,6 +18278,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -18224,6 +18287,29 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm + code |= insEncodeUimm2_20_to_19(imm & 0b11); // ii + code |= insEncodeImm1_22((imm & 0b100) >> 2); // i + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm + + // index is encoded at bit location 20; + // left-shift by one bit so we can reuse insEncodeUimm2_20_to_19 without modifying bit location 19 + code |= insEncodeUimm2_20_to_19(emitGetInsSC(id) << 1); // i + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition { @@ -21387,6 +21473,18 @@ void emitter::emitDispInsHelp( emitDispElementIndex(emitGetInsSC(id)); // ii break; + // .H, .H, .H[] + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + // .S, .S, .S[] + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + // .D, .D, .D[] + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id)); // iii + break; + // .B, /Z, .B, .B case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations { @@ -24187,6 +24285,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high // (unpredicated) + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 82d1d1dd02c20..82b01258be16e 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -559,6 +559,9 @@ static code_t insEncodeUimm2_9_to_8(ssize_t imm); // Returns the encoding for the immediate value as 2-bits at bit locations '20-19'. static code_t insEncodeUimm2_20_to_19(ssize_t imm); +// Returns the encoding for the immediate value as 1 bit at bit location '22'. +static code_t insEncodeImm1_22(ssize_t imm); + // Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. static code_t insEncodeUimm7_20_to_14(ssize_t imm); @@ -639,6 +642,12 @@ static bool isValidUimm2(ssize_t value) return (0 <= value) || (value <= 3); }; +// Returns true if 'value' is a legal unsigned immediate 3 bit encoding (such as for MUL). +static bool isValidUimm3(ssize_t value) +{ + return (0 <= value) || (value <= 7); +}; + // Returns true if 'value' is a legal unsigned immediate 4 bit encoding, starting from 1 (such as for CNTB). static bool isValidUimm4From1(ssize_t value) {