Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Arm64 encodings for SVE IF_SVE_CX_4A_A to IF_SVE_HT_4A group #96214

Merged
merged 9 commits into from
Jan 12, 2024
Merged
52 changes: 52 additions & 0 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4772,6 +4772,28 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P3, REG_P1, REG_V15, REG_V20,
INS_OPTS_SCALABLE_H); // CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>

// IF_SVE_CX_4A_A
theEmitter->emitIns_R_R_R_R(INS_sve_cmpeq, EA_SCALABLE, REG_P15, REG_P7, REG_V31, REG_V3, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WIDE); /* CMPEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmpge, EA_SCALABLE, REG_P14, REG_P6, REG_V21, REG_V13, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_WIDE); /* CMPGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmpgt, EA_SCALABLE, REG_P13, REG_P5, REG_V11, REG_V23, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_WIDE); /* CMPGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmphi, EA_SCALABLE, REG_P12, REG_P4, REG_V1, REG_V31, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WIDE); /* CMPHI <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmphs, EA_SCALABLE, REG_P11, REG_P3, REG_V0, REG_V30, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_WIDE); /* CMPHS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmple, EA_SCALABLE, REG_P4, REG_P2, REG_V10, REG_V0, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_WIDE); /* CMPLE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmplo, EA_SCALABLE, REG_P3, REG_P1, REG_V20, REG_V1, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WIDE); /* CMPLO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmpls, EA_SCALABLE, REG_P2, REG_P0, REG_V30, REG_V2, INS_OPTS_SCALABLE_H,
INS_SCALABLE_OPTS_WIDE); /* CMPLS <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmplt, EA_SCALABLE, REG_P1, REG_P7, REG_V24, REG_V8, INS_OPTS_SCALABLE_S,
INS_SCALABLE_OPTS_WIDE); /* CMPLT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */
theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P0, REG_P0, REG_V14, REG_V28, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WIDE); /* CMPNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D */

// IF_SVE_EP_3A
theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10,
INS_OPTS_SCALABLE_B); // SHADD <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
Expand Down Expand Up @@ -4846,6 +4868,12 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20,
INS_OPTS_SCALABLE_D); // URSHLR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>

// IF_SVE_GE_4A
theEmitter->emitIns_R_R_R_R(INS_sve_match, EA_SCALABLE, REG_P15, REG_P0, REG_V21, REG_V0,
INS_OPTS_SCALABLE_B); // MATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_nmatch, EA_SCALABLE, REG_P0, REG_P7, REG_V11, REG_V31,
INS_OPTS_SCALABLE_H); // NMATCH <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>

// IF_SVE_GR_3A
theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19,
INS_OPTS_SCALABLE_H); // FADDP <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>
Expand Down Expand Up @@ -4900,6 +4928,30 @@ void CodeGen::genArm64EmitterUnitTestsSve()
theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29,
INS_OPTS_SCALABLE_D); // FSUBR <Zdn>.<T>, <Pg>/M, <Zdn>.<T>, <Zm>.<T>

// IF_SVE_HT_4A
theEmitter->emitIns_R_R_R_R(INS_sve_facge, EA_SCALABLE, REG_P0, REG_P0, REG_V10, REG_V31,
INS_OPTS_SCALABLE_H); // FACGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_facgt, EA_SCALABLE, REG_P15, REG_P1, REG_V20, REG_V21,
INS_OPTS_SCALABLE_S); // FACGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_facle, EA_SCALABLE, REG_P1, REG_P2, REG_V0, REG_V11,
INS_OPTS_SCALABLE_D); // FACLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_faclt, EA_SCALABLE, REG_P14, REG_P3, REG_V30, REG_V1,
INS_OPTS_SCALABLE_H); // FACLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmeq, EA_SCALABLE, REG_P2, REG_P4, REG_V28, REG_V8,
INS_OPTS_SCALABLE_S); // FCMEQ <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmge, EA_SCALABLE, REG_P13, REG_P5, REG_V8, REG_V18,
INS_OPTS_SCALABLE_D); // FCMGE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmgt, EA_SCALABLE, REG_P3, REG_P6, REG_V18, REG_V28,
INS_OPTS_SCALABLE_H); // FCMGT <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmle, EA_SCALABLE, REG_P12, REG_P7, REG_V1, REG_V30,
INS_OPTS_SCALABLE_S); // FCMLE <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmlt, EA_SCALABLE, REG_P4, REG_P0, REG_V11, REG_V0,
INS_OPTS_SCALABLE_D); // FCMLT <Pd>.<T>, <Pg>/Z, <Zm>.<T>, <Zn>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmne, EA_SCALABLE, REG_P11, REG_P1, REG_V21, REG_V10,
INS_OPTS_SCALABLE_H); // FCMNE <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
theEmitter->emitIns_R_R_R_R(INS_sve_fcmuo, EA_SCALABLE, REG_P5, REG_P2, REG_V31, REG_V20,
INS_OPTS_SCALABLE_S); // FCMUO <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>

// IF_SVE_AF_3A
theEmitter->emitIns_R_R_R(INS_sve_andv, EA_1BYTE, REG_V0, REG_P0, REG_V0,
INS_OPTS_SCALABLE_B); // ANDV <V><d>, <Pg>, <Zn>.<T>
Expand Down
139 changes: 123 additions & 16 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1058,12 +1058,42 @@ void emitter::emitInsSanityCheck(instrDesc* id)
// Scalable, 4 regs, to predicate register.
case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
elemsize = id->idOpSize();
assert(isScalableVectorSize(elemsize));
assert(insOptsScalableStandard(id->idInsOpt())); // xx
assert(isPredicateRegister(id->idReg1())); // DDDD
assert(isLowPredicateRegister(id->idReg2())); // ggg
assert(isVectorRegister(id->idReg3())); // mmmmm
assert(isVectorRegister(id->idReg4())); // nnnnn
assert(isVectorRegister(id->idReg3())); // nnnnn
assert(isVectorRegister(id->idReg4())); // mmmmm
break;

case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
elemsize = id->idOpSize();
assert(isScalableVectorSize(elemsize));
assert(insOptsScalableWide(id->idInsOpt())); // xx
assert(isPredicateRegister(id->idReg1())); // DDDD
assert(isLowPredicateRegister(id->idReg2())); // ggg
assert(isVectorRegister(id->idReg3())); // mmmmm
assert(isVectorRegister(id->idReg4())); // nnnnn
break;

case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
elemsize = id->idOpSize();
assert(isScalableVectorSize(elemsize));
assert(insOptsScalableAtMaxHalf(id->idInsOpt()));
assert(isPredicateRegister(id->idReg1())); // DDDD
assert(isLowPredicateRegister(id->idReg2())); // ggg
assert(isVectorRegister(id->idReg3())); // nnnnn
assert(isVectorRegister(id->idReg4())); // mmmmm
break;

case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
elemsize = id->idOpSize();
assert(isScalableVectorSize(elemsize));
assert(insOptsScalableFloat(id->idInsOpt()));
assert(isPredicateRegister(id->idReg1())); // DDDD
assert(isLowPredicateRegister(id->idReg2())); // ggg
assert(isVectorRegister(id->idReg3())); // nnnnn
assert(isVectorRegister(id->idReg4())); // mmmmm
break;

// Scalable FP.
Expand Down Expand Up @@ -10614,13 +10644,14 @@ void emitter::emitIns_R_R_I_I(
* Add an instruction referencing four registers.
*/

void emitter::emitIns_R_R_R_R(instruction ins,
emitAttr attr,
regNumber reg1,
regNumber reg2,
regNumber reg3,
regNumber reg4,
insOpts opt /* = INS_OPT_NONE*/)
void emitter::emitIns_R_R_R_R(instruction ins,
emitAttr attr,
regNumber reg1,
regNumber reg2,
regNumber reg3,
regNumber reg4,
insOpts opt /* = INS_OPT_NONE*/,
insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
{
emitAttr size = EA_SIZE(attr);
insFormat fmt = IF_NONE;
Expand Down Expand Up @@ -10669,13 +10700,53 @@ void emitter::emitIns_R_R_R_R(instruction ins,
case INS_sve_cmplo:
case INS_sve_cmpls:
case INS_sve_cmplt:
assert(insOptsScalableStandard(opt));
assert(isPredicateRegister(reg1)); // DDDD
assert(isLowPredicateRegister(reg2)); // ggg
assert(isVectorRegister(reg3)); // mmmmm
assert(isVectorRegister(reg4)); // nnnnn
assert(isVectorRegister(reg3)); // nnnnn
assert(isVectorRegister(reg4)); // mmmmm
assert(isScalableVectorSize(attr)); // xx
if (sopt == INS_SCALABLE_OPTS_WIDE)
{
assert(insOptsScalableWide(opt));
fmt = IF_SVE_CX_4A_A;
}
else
{
assert(insScalableOptsNone(sopt));
assert(insOptsScalableStandard(opt));
fmt = IF_SVE_CX_4A;
}
break;

case INS_sve_fcmeq:
case INS_sve_fcmge:
case INS_sve_facge:
case INS_sve_fcmgt:
case INS_sve_facgt:
case INS_sve_fcmlt:
case INS_sve_fcmle:
case INS_sve_fcmne:
case INS_sve_fcmuo:
case INS_sve_facle:
case INS_sve_faclt:
assert(insOptsScalableFloat(opt));
assert(isVectorRegister(reg3)); // nnnnn
assert(isVectorRegister(reg4)); // mmmmm
assert(isPredicateRegister(reg1)); // DDDD
assert(isLowPredicateRegister(reg2)); // ggg
assert(isScalableVectorSize(attr)); // xx
fmt = IF_SVE_CX_4A;
fmt = IF_SVE_HT_4A;
break;

case INS_sve_match:
case INS_sve_nmatch:
assert(insOptsScalableAtMaxHalf(opt));
assert(isPredicateRegister(reg1)); // DDDD
assert(isLowPredicateRegister(reg2)); // ggg
assert(isVectorRegister(reg3)); // nnnnn
assert(isVectorRegister(reg4)); // mmmmm
assert(isScalableVectorSize(attr)); // xx
fmt = IF_SVE_GE_4A;
break;

case INS_sve_mla:
Expand Down Expand Up @@ -10725,6 +10796,22 @@ void emitter::emitIns_R_R_R_R(instruction ins,
std::swap(reg3, reg4);
ins = INS_sve_cmpgt;
break;
case INS_sve_facle:
std::swap(reg3, reg4);
ins = INS_sve_facge;
break;
case INS_sve_faclt:
std::swap(reg3, reg4);
ins = INS_sve_facgt;
break;
case INS_sve_fcmle:
std::swap(reg3, reg4);
ins = INS_sve_fcmge;
break;
case INS_sve_fcmlt:
std::swap(reg3, reg4);
ins = INS_sve_fcmgt;
break;
default:
break;
}
Expand Down Expand Up @@ -16171,7 +16258,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD
code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg
Expand Down Expand Up @@ -18742,10 +18832,20 @@ void emitter::emitDispInsHelp(

// <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.<T>
case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD
emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg
emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn
emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm
break;

// <Pd>.<T>, <Pg>/Z, <Zn>.<T>, <Zm>.D
case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD
emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg
emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm
emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // nnnnn
emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // nnnnn
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While passing INS_OPTS_SCALABLE_D looks fine for this instruction format, I was checking other formats where we directly pass the INS_OPTS and seems like for IF_SVE_EQ_3A, we should have a method that is reverse of optWidenSveElemsizeArrangement(), which will basically lower H->B, S->H and D->S instead of manipulating the idOpts() this way.

case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
emitDispLowPredicateReg(id->idReg2(), PREDICATE_MERGE, id->idInsOpt(), true); // ggg
emitDispSveReg(id->idReg3(), (insOpts)((unsigned)id->idInsOpt() - 1), false); // mmmmm

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, makes sense. I will piggyback this change on my next patch 👍

break;

// <Zda>.<T>, <Pg>/M, <Zn>.<Tb>
Expand Down Expand Up @@ -21276,11 +21376,18 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
result.insLatency = PERFSCORE_LATENCY_4C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match
case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors
result.insLatency = PERFSCORE_LATENCY_2C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
break;

// Extract/insert operation, SIMD and FP scalar form
case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register
result.insLatency = PERFSCORE_LATENCY_3C;
Expand Down
21 changes: 14 additions & 7 deletions src/coreclr/jit/emitarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,12 @@ inline static bool insOptsScalableAtLeastHalf(insOpts opt)
return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D));
}

inline static bool insOptsScalableAtMaxHalf(insOpts opt)
{
// `opt` is any of the standard half and below scalable types.
return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H));
}

inline static bool insOptsScalableFloat(insOpts opt)
{
// `opt` is any of the scalable types that are valid for FP.
Expand Down Expand Up @@ -1100,13 +1106,14 @@ void emitIns_R_R_R_Ext(instruction ins,
void emitIns_R_R_I_I(
instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE);

void emitIns_R_R_R_R(instruction ins,
emitAttr attr,
regNumber reg1,
regNumber reg2,
regNumber reg3,
regNumber reg4,
insOpts opt = INS_OPTS_NONE);
void emitIns_R_R_R_R(instruction ins,
emitAttr attr,
regNumber reg1,
regNumber reg2,
regNumber reg3,
regNumber reg4,
insOpts opt = INS_OPTS_NONE,
insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);

void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond);

Expand Down
Loading