From 7b4937edb254c642b341a90f36f72a4458bfc646 Mon Sep 17 00:00:00 2001 From: SwapnilGaikwad Date: Fri, 12 Jan 2024 17:31:41 +0000 Subject: [PATCH] Add Arm64 encodings for SVE IF_SVE_CX_4A_A to IF_SVE_HT_4A group (#96214) * Add SVE IF_SVE_CQ_4A_A group * Fix format issues * Add Arm64 encodings for IF_SVE_GE_4A group * Fix build issue * Add Arm64 encodings for case IF_SVE_HT_4A group * Fix build and formatting * Remove redundant asserts --- src/coreclr/jit/codegenarm64test.cpp | 52 ++++++++++ src/coreclr/jit/emitarm64.cpp | 139 ++++++++++++++++++++++++--- src/coreclr/jit/emitarm64.h | 21 ++-- 3 files changed, 189 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2d59d3e461541..4cbbaebfa0e3b 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4772,6 +4772,28 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P3, REG_P1, REG_V15, REG_V20, INS_OPTS_SCALABLE_H); // CMPNE ., /Z, ., . + // IF_SVE_CX_4A_A + theEmitter->emitIns_R_R_R_R(INS_sve_cmpeq, EA_SCALABLE, REG_P15, REG_P7, REG_V31, REG_V3, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPEQ ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpge, EA_SCALABLE, REG_P14, REG_P6, REG_V21, REG_V13, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WIDE); /* CMPGE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpgt, EA_SCALABLE, REG_P13, REG_P5, REG_V11, REG_V23, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WIDE); /* CMPGT ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmphi, EA_SCALABLE, REG_P12, REG_P4, REG_V1, REG_V31, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPHI ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmphs, EA_SCALABLE, REG_P11, REG_P3, REG_V0, REG_V30, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WIDE); /* CMPHS ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmple, EA_SCALABLE, REG_P4, REG_P2, REG_V10, REG_V0, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WIDE); /* CMPLE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmplo, EA_SCALABLE, REG_P3, REG_P1, REG_V20, REG_V1, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPLO ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpls, EA_SCALABLE, REG_P2, REG_P0, REG_V30, REG_V2, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WIDE); /* CMPLS ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmplt, EA_SCALABLE, REG_P1, REG_P7, REG_V24, REG_V8, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WIDE); /* CMPLT ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P0, REG_P0, REG_V14, REG_V28, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPNE ., /Z, ., .D */ + // IF_SVE_EP_3A theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, INS_OPTS_SCALABLE_B); // SHADD ., /M, ., . @@ -4846,6 +4868,12 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, INS_OPTS_SCALABLE_D); // URSHLR ., /M, ., . + // IF_SVE_GE_4A + theEmitter->emitIns_R_R_R_R(INS_sve_match, EA_SCALABLE, REG_P15, REG_P0, REG_V21, REG_V0, + INS_OPTS_SCALABLE_B); // MATCH ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_nmatch, EA_SCALABLE, REG_P0, REG_P7, REG_V11, REG_V31, + INS_OPTS_SCALABLE_H); // NMATCH ., /Z, ., . + // IF_SVE_GR_3A theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, INS_OPTS_SCALABLE_H); // FADDP ., /M, ., . @@ -4900,6 +4928,30 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, INS_OPTS_SCALABLE_D); // FSUBR ., /M, ., . + // IF_SVE_HT_4A + theEmitter->emitIns_R_R_R_R(INS_sve_facge, EA_SCALABLE, REG_P0, REG_P0, REG_V10, REG_V31, + INS_OPTS_SCALABLE_H); // FACGE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_facgt, EA_SCALABLE, REG_P15, REG_P1, REG_V20, REG_V21, + INS_OPTS_SCALABLE_S); // FACGT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_facle, EA_SCALABLE, REG_P1, REG_P2, REG_V0, REG_V11, + INS_OPTS_SCALABLE_D); // FACLE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_faclt, EA_SCALABLE, REG_P14, REG_P3, REG_V30, REG_V1, + INS_OPTS_SCALABLE_H); // FACLT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmeq, EA_SCALABLE, REG_P2, REG_P4, REG_V28, REG_V8, + INS_OPTS_SCALABLE_S); // FCMEQ ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmge, EA_SCALABLE, REG_P13, REG_P5, REG_V8, REG_V18, + INS_OPTS_SCALABLE_D); // FCMGE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmgt, EA_SCALABLE, REG_P3, REG_P6, REG_V18, REG_V28, + INS_OPTS_SCALABLE_H); // FCMGT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmle, EA_SCALABLE, REG_P12, REG_P7, REG_V1, REG_V30, + INS_OPTS_SCALABLE_S); // FCMLE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmlt, EA_SCALABLE, REG_P4, REG_P0, REG_V11, REG_V0, + INS_OPTS_SCALABLE_D); // FCMLT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmne, EA_SCALABLE, REG_P11, REG_P1, REG_V21, REG_V10, + INS_OPTS_SCALABLE_H); // FCMNE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmuo, EA_SCALABLE, REG_P5, REG_P2, REG_V31, REG_V20, + INS_OPTS_SCALABLE_S); // FCMUO ., /Z, ., . + // IF_SVE_AF_3A theEmitter->emitIns_R_R_R(INS_sve_andv, EA_1BYTE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_B); // ANDV , , . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f0e6f1d27da34..0de5ebbfdcd73 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1058,12 +1058,42 @@ void emitter::emitInsSanityCheck(instrDesc* id) // Scalable, 4 regs, to predicate register. case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); + assert(isScalableVectorSize(elemsize)); assert(insOptsScalableStandard(id->idInsOpt())); // xx assert(isPredicateRegister(id->idReg1())); // DDDD assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isVectorRegister(id->idReg4())); // nnnnn + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + elemsize = id->idOpSize(); + assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableWide(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg4())); // nnnnn + break; + + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + elemsize = id->idOpSize(); + assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableAtMaxHalf(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + elemsize = id->idOpSize(); assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableFloat(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm break; // Scalable FP. @@ -10614,13 +10644,14 @@ void emitter::emitIns_R_R_I_I( * Add an instruction referencing four registers. */ -void emitter::emitIns_R_R_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - regNumber reg4, - insOpts opt /* = INS_OPT_NONE*/) +void emitter::emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt /* = INS_OPT_NONE*/, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { emitAttr size = EA_SIZE(attr); insFormat fmt = IF_NONE; @@ -10669,13 +10700,53 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_sve_cmplo: case INS_sve_cmpls: case INS_sve_cmplt: - assert(insOptsScalableStandard(opt)); assert(isPredicateRegister(reg1)); // DDDD assert(isLowPredicateRegister(reg2)); // ggg - assert(isVectorRegister(reg3)); // mmmmm - assert(isVectorRegister(reg4)); // nnnnn + assert(isVectorRegister(reg3)); // nnnnn + assert(isVectorRegister(reg4)); // mmmmm + assert(isScalableVectorSize(attr)); // xx + if (sopt == INS_SCALABLE_OPTS_WIDE) + { + assert(insOptsScalableWide(opt)); + fmt = IF_SVE_CX_4A_A; + } + else + { + assert(insScalableOptsNone(sopt)); + assert(insOptsScalableStandard(opt)); + fmt = IF_SVE_CX_4A; + } + break; + + case INS_sve_fcmeq: + case INS_sve_fcmge: + case INS_sve_facge: + case INS_sve_fcmgt: + case INS_sve_facgt: + case INS_sve_fcmlt: + case INS_sve_fcmle: + case INS_sve_fcmne: + case INS_sve_fcmuo: + case INS_sve_facle: + case INS_sve_faclt: + assert(insOptsScalableFloat(opt)); + assert(isVectorRegister(reg3)); // nnnnn + assert(isVectorRegister(reg4)); // mmmmm + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg assert(isScalableVectorSize(attr)); // xx - fmt = IF_SVE_CX_4A; + fmt = IF_SVE_HT_4A; + break; + + case INS_sve_match: + case INS_sve_nmatch: + assert(insOptsScalableAtMaxHalf(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isVectorRegister(reg3)); // nnnnn + assert(isVectorRegister(reg4)); // mmmmm + assert(isScalableVectorSize(attr)); // xx + fmt = IF_SVE_GE_4A; break; case INS_sve_mla: @@ -10725,6 +10796,22 @@ void emitter::emitIns_R_R_R_R(instruction ins, std::swap(reg3, reg4); ins = INS_sve_cmpgt; break; + case INS_sve_facle: + std::swap(reg3, reg4); + ins = INS_sve_facge; + break; + case INS_sve_faclt: + std::swap(reg3, reg4); + ins = INS_sve_facgt; + break; + case INS_sve_fcmle: + std::swap(reg3, reg4); + ins = INS_sve_fcmge; + break; + case INS_sve_fcmlt: + std::swap(reg3, reg4); + ins = INS_sve_fcmgt; + break; default: break; } @@ -16171,7 +16258,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg @@ -18742,10 +18832,20 @@ void emitter::emitDispInsHelp( // ., /Z, ., . case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm + break; + + // ., /Z, ., .D + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm - emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // nnnnn + emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // nnnnn break; // ., /M, . @@ -21276,11 +21376,18 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + // Extract/insert operation, SIMD and FP scalar form case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register result.insLatency = PERFSCORE_LATENCY_3C; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 12650ee664c07..b69d71eff8342 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -995,6 +995,12 @@ inline static bool insOptsScalableAtLeastHalf(insOpts opt) return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableAtMaxHalf(insOpts opt) +{ + // `opt` is any of the standard half and below scalable types. + return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H)); +} + inline static bool insOptsScalableFloat(insOpts opt) { // `opt` is any of the scalable types that are valid for FP. @@ -1100,13 +1106,14 @@ void emitIns_R_R_R_Ext(instruction ins, void emitIns_R_R_I_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); -void emitIns_R_R_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - regNumber reg4, - insOpts opt = INS_OPTS_NONE); +void emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt = INS_OPTS_NONE, + insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond);