Skip to content

Commit

Permalink
Add Arm64 encodings for SVE_DD_2A & SVE_DG_2A (#97446)
Browse files Browse the repository at this point in the history
* Add Arm64 encodings for IF_SVE_DD_2A to IF_SVE_DG_2A

* Separate handling of predicate type ofr SVE_DG_2A
  • Loading branch information
SwapnilGaikwad authored Jan 25, 2024
1 parent de04898 commit c280006
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 3 deletions.
10 changes: 10 additions & 0 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4818,6 +4818,16 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R_I(INS_sve_cmpls, EA_SCALABLE, REG_P0, REG_P3, REG_V9, 127,
INS_OPTS_SCALABLE_D); /* CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, #<imm> */

// IF_SVE_DD_2A
theEmitter->emitIns_R_R(INS_sve_pfirst, EA_SCALABLE, REG_P0, REG_P15,
INS_OPTS_SCALABLE_B); // PFIRST <Pdn>.B, <Pg>, <Pdn>.B

// IF_SVE_DG_2A
theEmitter->emitIns_R_R(INS_sve_rdffr, EA_SCALABLE, REG_P10, REG_P15,
INS_OPTS_SCALABLE_B); // RDFFR <Pd>.B, <Pg>/Z
theEmitter->emitIns_R_R(INS_sve_rdffrs, EA_SCALABLE, REG_P7, REG_P14,
INS_OPTS_SCALABLE_B); // RDFFRS <Pd>.B, <Pg>/Z

// IF_SVE_EP_3A
theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10,
INS_OPTS_SCALABLE_B); // SHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
Expand Down
78 changes: 75 additions & 3 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1130,6 +1130,13 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isValidUimm7(emitGetInsSC(id))); // iiiii
break;

case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
assert(id->idInsOpt() == INS_OPTS_SCALABLE_B);
assert(isPredicateRegister(id->idReg1())); // DDDD
assert(isPredicateRegister(id->idReg2())); // gggg
break;

case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
elemsize = id->idOpSize();
assert(isScalableVectorSize(elemsize));
Expand Down Expand Up @@ -7396,6 +7403,21 @@ void emitter::emitIns_R_R(instruction ins,
}
break;

case INS_sve_pfirst:
assert(opt == INS_OPTS_SCALABLE_B);
assert(isPredicateRegister(reg1)); // DDDD
assert(isPredicateRegister(reg2)); // gggg
fmt = IF_SVE_DD_2A;
break;

case INS_sve_rdffr:
case INS_sve_rdffrs:
assert(opt == INS_OPTS_SCALABLE_B);
assert(isPredicateRegister(reg1)); // DDDD
assert(isPredicateRegister(reg2)); // gggg
fmt = IF_SVE_DG_2A;
break;

case INS_sve_cntp:
assert(insOptsScalableStandard(opt));
assert(insScalableOptsWithVectorLength(sopt)); // l
Expand Down Expand Up @@ -14598,7 +14620,6 @@ void emitter::emitIns_Call(EmitCallType callType,
case IF_SVE_IN_4A:
case IF_SVE_IX_4A:
case IF_SVE_HI_3A:
case IF_SVE_DG_2A:
case IF_SVE_IO_3A:
case IF_SVE_IP_4A:
case IF_SVE_IQ_3A:
Expand Down Expand Up @@ -14758,21 +14779,27 @@ void emitter::emitIns_Call(EmitCallType callType,
case IF_SVE_AJ_3A:
case IF_SVE_AL_3A:
case IF_SVE_CL_3A:
case IF_SVE_DD_2A:
case IF_SVE_DF_2A:
case IF_SVE_GS_3A:
case IF_SVE_HJ_3A:
case IF_SVE_IY_4A:
return PREDICATE_NONE;

case IF_SVE_DD_2A:
assert((regpos >= 1) && (regpos <= 3));
return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED);

case IF_SVE_CX_4A:
case IF_SVE_CX_4A_A:
case IF_SVE_CY_3A:
case IF_SVE_CY_3B:
case IF_SVE_GE_4A:
case IF_SVE_HT_4A:
assert((regpos == 1) || (regpos == 2));
return (regpos == 1 ? PREDICATE_SIZED : PREDICATE_ZERO);
return ((regpos == 1) ? PREDICATE_SIZED : PREDICATE_ZERO);

case IF_SVE_DG_2A:
return ((regpos == 1) ? PREDICATE_SIZED : PREDICATE_ZERO);

default:
break;
Expand Down Expand Up @@ -17199,6 +17226,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
code |= insEncodeReg_P_8_to_5(id->idReg2()); // gggg
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_GA_2A: // ............iiii ......nnnn.ddddd -- SME2 multi-vec shift narrow
imm = emitGetInsSC(id);
assert(id->idInsOpt() == INS_OPTS_SCALABLE_H);
Expand Down Expand Up @@ -20054,6 +20089,18 @@ void emitter::emitDispInsHelp(
emitDispImm(emitGetInsSC(id), false, (fmt == IF_SVE_CY_3B)); // iiiii
break;

// <Pdn>.B, <Pg>, <Pdn>.B
case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); // gggg
emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 3), id->idInsOpt(), false); // DDDD
break;

case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); // DDDD
emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), false); // gggg
break;

// <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
Expand Down Expand Up @@ -22832,6 +22879,31 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case IF_SVE_DD_2A: // ................ .......gggg.DDDD -- SVE predicate first active
result.insLatency = PERFSCORE_LATENCY_3C;
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
break;

case IF_SVE_DG_2A: // ................ .......gggg.DDDD -- SVE predicate read from FFR (predicated)
switch (ins)
{
case INS_sve_rdffr:
result.insLatency = PERFSCORE_LATENCY_3C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case INS_sve_rdffrs:
result.insLatency = PERFSCORE_LATENCY_4C;
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
break;

default:
// all other instructions
perfScoreUnhandledInstruction(id, &result);
break;
}
break;

case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
result.insLatency = PERFSCORE_LATENCY_2C;
Expand Down

0 comments on commit c280006

Please sign in to comment.