diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 0b326d79943ff..b7da8cfefe68d 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -5852,6 +5852,36 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_I(INS_sve_usdot, EA_SCALABLE, REG_V23, REG_V24, REG_V3, 3, INS_OPTS_SCALABLE_B); // USDOT .S, .B, .B[] + // IF_SVE_FD_3A + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7, + INS_OPTS_SCALABLE_H); // MUL .H, .H, .H[] + + // IF_SVE_FD_3B + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V8, REG_V9, REG_V1, 0, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V10, REG_V11, REG_V3, 1, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V12, REG_V13, REG_V5, 2, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 3, + INS_OPTS_SCALABLE_S); // MUL .S, .S, .S[] + + // IF_SVE_FD_3C + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1, + INS_OPTS_SCALABLE_D); // MUL .D, .D, .D[] + // IF_SVE_ED_1A theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128, INS_OPTS_SCALABLE_B); // SMAX ., ., # diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5e8ef7608e938..9dfd81b557606 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1146,6 +1146,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) assert(insOptsScalableStandard(id->idInsOpt())); assert(isVectorRegister(id->idReg1())); // ddddd assert(isVectorRegister(id->idReg2())); // nnnnn @@ -1154,6 +1155,24 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidUimm2(emitGetInsSC(id))); // ii break; + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7)); + assert(isValidUimm3(emitGetInsSC(id))); // iii + break; + + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + assert(insOptsScalableStandard(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ddddd + assert(isVectorRegister(id->idReg2())); // nnnnn + assert(isVectorRegister(id->idReg3())); // mmmm + assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15)); + assert(isValidImm1(emitGetInsSC(id))); // i + break; + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations assert(id->idInsOpt() == INS_OPTS_SCALABLE_B); assert(isPredicateRegister(id->idReg1())); // DDDD @@ -10445,6 +10464,38 @@ void emitter::emitIns_R_R_R_I(instruction ins, fmt = IF_SVE_EZ_3A; break; + case INS_sve_mul: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); // ddddd + assert(isVectorRegister(reg2)); // nnnnn + assert(isVectorRegister(reg3)); + + switch (opt) + { + case INS_OPTS_SCALABLE_H: + assert(isValidUimm3(imm)); // iii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3A; + break; + + case INS_OPTS_SCALABLE_S: + assert(isValidUimm2(imm)); // ii + assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm + fmt = IF_SVE_FD_3B; + break; + + case INS_OPTS_SCALABLE_D: + assert(isValidImm1(imm)); // i + assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm + fmt = IF_SVE_FD_3C; + break; + + default: + unreached(); + break; + } + break; + case INS_fmul: // by element, imm[0..3] selects the element of reg3 case INS_fmla: case INS_fmls: @@ -16167,6 +16218,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)imm << 19; } +/***************************************************************************** + * + * Returns the encoding for the immediate value as 1 bit at bit location '22'. + */ + +/*static*/ emitter::code_t emitter::insEncodeImm1_22(ssize_t imm) +{ + assert(isValidImm1(imm)); + return (code_t)imm << 22; +} + /***************************************************************************** * * Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. @@ -18387,6 +18449,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed) case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed) case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn @@ -18395,6 +18458,29 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm + code |= insEncodeUimm2_20_to_19(imm & 0b11); // ii + code |= insEncodeImm1_22((imm & 0b100) >> 2); // i + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn + code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm + + // index is encoded at bit location 20; + // left-shift by one bit so we can reuse insEncodeUimm2_20_to_19 without modifying bit location 19 + code |= insEncodeUimm2_20_to_19(emitGetInsSC(id) << 1); // i + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition { @@ -21606,6 +21692,18 @@ void emitter::emitDispInsHelp( emitDispElementIndex(emitGetInsSC(id), false); // ii break; + // .H, .H, .H[] + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + // .S, .S, .S[] + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + // .D, .D, .D[] + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm + emitDispElementIndex(emitGetInsSC(id)); // iii + break; + // .B, /Z, .B, .B case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations { @@ -24460,6 +24558,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated) case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high // (unpredicated) + case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed) + case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed) result.insThroughput = PERFSCORE_THROUGHPUT_2X; result.insLatency = PERFSCORE_LATENCY_5C; break; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 71e9b43efe28b..5c32ebff24e9a 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -562,6 +562,9 @@ static code_t insEncodeUimm2_11_to_10(ssize_t imm); // Returns the encoding for the immediate value as 2-bits at bit locations '20-19'. static code_t insEncodeUimm2_20_to_19(ssize_t imm); +// Returns the encoding for the immediate value as 1 bit at bit location '22'. +static code_t insEncodeImm1_22(ssize_t imm); + // Returns the encoding for the immediate value as 7-bits at bit locations '20-14'. static code_t insEncodeUimm7_20_to_14(ssize_t imm);