Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT ARM64-SVE: Implement IF_SVE_FD_3A, IF_SVE_FD_3B, IF_SVE_FD_3C #97934

Merged
merged 2 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5848,6 +5848,36 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R_I(INS_sve_usdot, EA_SCALABLE, REG_V23, REG_V24, REG_V3, 3,
INS_OPTS_SCALABLE_B); // USDOT <Zda>.S, <Zn>.B, <Zm>.B[<imm>]

// IF_SVE_FD_3A
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V0, REG_V1, REG_V1, 1,
INS_OPTS_SCALABLE_H); // MUL <Zd>.H, <Zn>.H, <Zm>.H[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V2, REG_V3, REG_V3, 3,
INS_OPTS_SCALABLE_H); // MUL <Zd>.H, <Zn>.H, <Zm>.H[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V4, REG_V5, REG_V5, 5,
INS_OPTS_SCALABLE_H); // MUL <Zd>.H, <Zn>.H, <Zm>.H[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V6, REG_V7, REG_V7, 7,
INS_OPTS_SCALABLE_H); // MUL <Zd>.H, <Zn>.H, <Zm>.H[<imm>]

// IF_SVE_FD_3B
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V8, REG_V9, REG_V1, 0,
INS_OPTS_SCALABLE_S); // MUL <Zd>.S, <Zn>.S, <Zm>.S[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V10, REG_V11, REG_V3, 1,
INS_OPTS_SCALABLE_S); // MUL <Zd>.S, <Zn>.S, <Zm>.S[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V12, REG_V13, REG_V5, 2,
INS_OPTS_SCALABLE_S); // MUL <Zd>.S, <Zn>.S, <Zm>.S[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V14, REG_V15, REG_V7, 3,
INS_OPTS_SCALABLE_S); // MUL <Zd>.S, <Zn>.S, <Zm>.S[<imm>]

// IF_SVE_FD_3C
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V16, REG_V17, REG_V0, 0,
INS_OPTS_SCALABLE_D); // MUL <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V18, REG_V19, REG_V5, 1,
INS_OPTS_SCALABLE_D); // MUL <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V20, REG_V21, REG_V10, 0,
INS_OPTS_SCALABLE_D); // MUL <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
theEmitter->emitIns_R_R_R_I(INS_sve_mul, EA_SCALABLE, REG_V22, REG_V23, REG_V15, 1,
INS_OPTS_SCALABLE_D); // MUL <Zd>.D, <Zn>.D, <Zm>.D[<imm>]

// IF_SVE_ED_1A
theEmitter->emitIns_R_I(INS_sve_smax, EA_SCALABLE, REG_V0, -128,
INS_OPTS_SCALABLE_B); // SMAX <Zdn>.<T>, <Zdn>.<T>, #<imm>
Expand Down
101 changes: 101 additions & 0 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1141,6 +1141,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
assert(insOptsScalableStandard(id->idInsOpt()));
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isVectorRegister(id->idReg2())); // nnnnn
Expand All @@ -1149,6 +1150,24 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isValidUimm2(emitGetInsSC(id))); // ii
break;

case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
assert(insOptsScalableStandard(id->idInsOpt()));
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isVectorRegister(id->idReg2())); // nnnnn
assert(isVectorRegister(id->idReg3())); // mmm
assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V7));
assert(isValidUimm3(emitGetInsSC(id))); // iii
break;

case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
assert(insOptsScalableStandard(id->idInsOpt()));
assert(isVectorRegister(id->idReg1())); // ddddd
assert(isVectorRegister(id->idReg2())); // nnnnn
assert(isVectorRegister(id->idReg3())); // mmmm
assert((REG_V0 <= id->idReg3()) && (id->idReg3() <= REG_V15));
assert(isValidImm1(emitGetInsSC(id))); // i
break;

case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
assert(isPredicateRegister(id->idReg1())); // DDDD
Expand Down Expand Up @@ -10411,6 +10430,38 @@ void emitter::emitIns_R_R_R_I(instruction ins,
fmt = IF_SVE_EZ_3A;
break;

case INS_sve_mul:
assert(insOptsScalableAtLeastHalf(opt));
assert(isVectorRegister(reg1)); // ddddd
assert(isVectorRegister(reg2)); // nnnnn
assert(isVectorRegister(reg3));

switch (opt)
{
case INS_OPTS_SCALABLE_H:
assert(isValidUimm3(imm)); // iii
assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
fmt = IF_SVE_FD_3A;
break;

case INS_OPTS_SCALABLE_S:
assert(isValidUimm2(imm)); // ii
assert((REG_V0 <= reg3) && (reg3 <= REG_V7)); // mmm
fmt = IF_SVE_FD_3B;
break;

case INS_OPTS_SCALABLE_D:
assert(isValidImm1(imm)); // i
assert((REG_V0 <= reg3) && (reg3 <= REG_V15)); // mmmm
fmt = IF_SVE_FD_3C;
break;

default:
unreached();
break;
}
break;

case INS_fmul: // by element, imm[0..3] selects the element of reg3
case INS_fmla:
case INS_fmls:
Expand Down Expand Up @@ -16003,6 +16054,17 @@ void emitter::emitIns_Call(EmitCallType callType,
return (code_t)imm << 19;
}

/*****************************************************************************
*
* Returns the encoding for the immediate value as 1 bit at bit location '22'.
*/

/*static*/ emitter::code_t emitter::insEncodeImm1_22(ssize_t imm)
{
assert(isValidImm1(imm));
return (code_t)imm << 22;
}

/*****************************************************************************
*
* Returns the encoding for the immediate value as 7-bits at bit locations '20-14'.
Expand Down Expand Up @@ -18216,6 +18278,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
case IF_SVE_EG_3A: // ...........iimmm ......nnnnnddddd -- SVE two-way dot product (indexed)
case IF_SVE_EY_3A: // ...........iimmm ......nnnnnddddd -- SVE integer dot product (indexed)
case IF_SVE_EZ_3A: // ...........iimmm ......nnnnnddddd -- SVE mixed sign dot product (indexed)
case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn
Expand All @@ -18224,6 +18287,29 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
imm = emitGetInsSC(id);
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn
code |= insEncodeReg_V_18_to_16(id->idReg3()); // mmm
code |= insEncodeUimm2_20_to_19(imm & 0b11); // ii
code |= insEncodeImm1_22((imm & 0b100) >> 2); // i
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd
code |= insEncodeReg_V_9_to_5(id->idReg2()); // nnnnn
code |= insEncodeReg_V_19_to_16(id->idReg3()); // mmmm

// index is encoded at bit location 20;
// left-shift by one bit so we can reuse insEncodeUimm2_20_to_19 without modifying bit location 19
code |= insEncodeUimm2_20_to_19(emitGetInsSC(id) << 1); // i
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
case IF_SVE_DA_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE propagate break from previous partition
{
Expand Down Expand Up @@ -21387,6 +21473,18 @@ void emitter::emitDispInsHelp(
emitDispElementIndex(emitGetInsSC(id)); // ii
break;

// <Zd>.H, <Zn>.H, <Zm>.H[<imm>]
case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
// <Zd>.S, <Zn>.S, <Zm>.S[<imm>]
case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
// <Zd>.D, <Zn>.D, <Zm>.D[<imm>]
case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
emitDispSveReg(id->idReg2(), id->idInsOpt(), true); // nnnnn
emitDispSveReg(id->idReg3(), id->idInsOpt(), false); // mmm
emitDispElementIndex(emitGetInsSC(id)); // iii
break;

// <Pd>.B, <Pg>/Z, <Pn>.B, <Pm>.B
case IF_SVE_CZ_4A: // ............MMMM ..gggg.NNNN.DDDD -- SVE predicate logical operations
{
Expand Down Expand Up @@ -24187,6 +24285,9 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case IF_SVE_BD_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 integer multiply vectors (unpredicated)
case IF_SVE_BE_3A: // ........xx.mmmmm ......nnnnnddddd -- SVE2 signed saturating doubling multiply high
// (unpredicated)
case IF_SVE_FD_3A: // .........i.iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
case IF_SVE_FD_3B: // ...........iimmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
case IF_SVE_FD_3C: // ...........immmm ......nnnnnddddd -- SVE2 integer multiply (indexed)
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency = PERFSCORE_LATENCY_5C;
break;
Expand Down
9 changes: 9 additions & 0 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,9 @@ static code_t insEncodeUimm2_9_to_8(ssize_t imm);
// Returns the encoding for the immediate value as 2-bits at bit locations '20-19'.
static code_t insEncodeUimm2_20_to_19(ssize_t imm);

// Returns the encoding for the immediate value as 1 bit at bit location '22'.
static code_t insEncodeImm1_22(ssize_t imm);

// Returns the encoding for the immediate value as 7-bits at bit locations '20-14'.
static code_t insEncodeUimm7_20_to_14(ssize_t imm);

Expand Down Expand Up @@ -639,6 +642,12 @@ static bool isValidUimm2(ssize_t value)
return (0 <= value) || (value <= 3);
};

// Returns true if 'value' is a legal unsigned immediate 3 bit encoding (such as for MUL).
static bool isValidUimm3(ssize_t value)
{
return (0 <= value) || (value <= 7);
};

// Returns true if 'value' is a legal unsigned immediate 4 bit encoding, starting from 1 (such as for CNTB).
static bool isValidUimm4From1(ssize_t value)
{
Expand Down
Loading