From 3ef3af4eb2890371c5a39c2dd7b1b259969144fc Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Mon, 18 Dec 2023 14:31:06 +0000 Subject: [PATCH 1/7] Add SVE IF_SVE_CQ_4A_A group --- src/coreclr/jit/codegenarm64test.cpp | 22 ++++++++++++++++ src/coreclr/jit/emitarm64.cpp | 39 +++++++++++++++++++++++----- 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2497b196edec7..7a707fb34bd3b 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4775,6 +4775,28 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P3, REG_P1, REG_V15, REG_V20, INS_OPTS_SCALABLE_H); // CMPNE ., /Z, ., . + // IF_SVE_CX_4A_A + theEmitter->emitIns_R_R_R_R(INS_sve_cmpeq, EA_SCALABLE, REG_P15, REG_P7, REG_V31, REG_V3, + INS_OPTS_SCALABLE_WIDE_B); /* CMPEQ ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpge, EA_SCALABLE, REG_P14, REG_P6, REG_V21, REG_V13, + INS_OPTS_SCALABLE_WIDE_H); /* CMPGE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpgt, EA_SCALABLE, REG_P13, REG_P5, REG_V11, REG_V23, + INS_OPTS_SCALABLE_WIDE_S); /* CMPGT ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmphi, EA_SCALABLE, REG_P12, REG_P4, REG_V1, REG_V31, + INS_OPTS_SCALABLE_WIDE_B); /* CMPHI ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmphs, EA_SCALABLE, REG_P11, REG_P3, REG_V0, REG_V30, + INS_OPTS_SCALABLE_WIDE_H); /* CMPHS ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmple, EA_SCALABLE, REG_P4, REG_P2, REG_V10, REG_V0, + INS_OPTS_SCALABLE_WIDE_S); /* CMPLE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmplo, EA_SCALABLE, REG_P3, REG_P1, REG_V20, REG_V1, + INS_OPTS_SCALABLE_WIDE_B); /* CMPLO ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpls, EA_SCALABLE, REG_P2, REG_P0, REG_V30, REG_V2, + INS_OPTS_SCALABLE_WIDE_H); /* CMPLS ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmplt, EA_SCALABLE, REG_P1, REG_P7, REG_V24, REG_V8, + INS_OPTS_SCALABLE_WIDE_S); /* CMPLT ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P0, REG_P0, REG_V14, REG_V28, + INS_OPTS_SCALABLE_WIDE_B); /* CMPNE ., /Z, ., .D */ + // IF_SVE_EP_3A theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, INS_OPTS_SCALABLE_B); // SHADD ., /M, ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index d8c99b9a7ba3e..9fc57785af448 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1057,13 +1057,21 @@ void emitter::emitInsSanityCheck(instrDesc* id) // Scalable, 4 regs, to predicate register. case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); - assert(insOptsScalableSimple(id->idInsOpt())); // xx + assert(isScalableVectorSize(elemsize)); assert(isPredicateRegister(id->idReg1())); // DDDD assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm assert(isVectorRegister(id->idReg4())); // nnnnn - assert(isScalableVectorSize(elemsize)); + if (id->idInsFmt() == IF_SVE_CX_4A) + { + assert(insOptsScalableSimple(id->idInsOpt())); // xx + } + else + { + assert(insOptsScalableWide(id->idInsOpt())); // xx + } break; // Scalable FP. @@ -9988,13 +9996,20 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_sve_cmplo: case INS_sve_cmpls: case INS_sve_cmplt: - assert(insOptsScalableSimple(opt)); assert(isPredicateRegister(reg1)); // DDDD assert(isLowPredicateRegister(reg2)); // ggg assert(isVectorRegister(reg3)); // mmmmm assert(isVectorRegister(reg4)); // nnnnn assert(isScalableVectorSize(attr)); // xx - fmt = IF_SVE_CX_4A; + if (insOptsScalableSimple(opt)) + { + fmt = IF_SVE_CX_4A; + } + else + { + assert(insOptsScalableWide(opt)); + fmt = IF_SVE_CX_4A_A; + } break; case INS_sve_mla: @@ -14945,7 +14960,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg @@ -17413,13 +17429,21 @@ void emitter::emitDispInsHelp( break; // ., /Z, ., . - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // nnnnn break; + // ., /Z, ., .D + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD + emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm + emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // nnnnn + break; + // ., /M, . case IF_SVE_EQ_3A: // ........xx...... ...gggnnnnnddddd -- SVE2 integer pairwise add and accumulate long emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd @@ -19838,7 +19862,8 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors result.insLatency = PERFSCORE_LATENCY_4C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; From ccbfdb85b29510e7b92f5d8cfe1a056a95cb1383 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 20 Dec 2023 13:55:09 +0000 Subject: [PATCH 2/7] Fix format issues --- src/coreclr/jit/emitarm64.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 9fc57785af448..bf288c21b0e78 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1056,14 +1056,14 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; // Scalable, 4 regs, to predicate register. - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); assert(isScalableVectorSize(elemsize)); - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isVectorRegister(id->idReg4())); // nnnnn + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // mmmmm + assert(isVectorRegister(id->idReg4())); // nnnnn if (id->idInsFmt() == IF_SVE_CX_4A) { assert(insOptsScalableSimple(id->idInsOpt())); // xx @@ -17429,7 +17429,7 @@ void emitter::emitDispInsHelp( break; // ., /Z, ., . - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm @@ -17441,7 +17441,7 @@ void emitter::emitDispInsHelp( emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm - emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // nnnnn + emitDispSveReg(id->idReg4(), INS_OPTS_SCALABLE_D, false); // nnnnn break; // ., /M, . From 0548526a90def37b7281f23693e6905703add20e Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Wed, 20 Dec 2023 16:24:32 +0000 Subject: [PATCH 3/7] Add Arm64 encodings for IF_SVE_GE_4A group --- src/coreclr/jit/codegenarm64test.cpp | 6 +++ src/coreclr/jit/emitarm64.cpp | 59 +++++++++++++++++++++------- src/coreclr/jit/emitarm64.h | 6 +++ 3 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 7a707fb34bd3b..b2007c093416e 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4871,6 +4871,12 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_urshlr, EA_SCALABLE, REG_V15, REG_P2, REG_V20, INS_OPTS_SCALABLE_D); // URSHLR ., /M, ., . + // IF_SVE_GE_4A + theEmitter->emitIns_R_R_R_R(INS_sve_match, EA_SCALABLE, REG_P15, REG_P0, REG_V21, REG_V0, + INS_OPTS_SCALABLE_B); // MATCH ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_nmatch, EA_SCALABLE, REG_P0, REG_P7, REG_V11, REG_V31, + INS_OPTS_SCALABLE_H); // NMATCH ., /Z, ., . + // IF_SVE_GR_3A theEmitter->emitIns_R_R_R(INS_sve_faddp, EA_SCALABLE, REG_V16, REG_P3, REG_V19, INS_OPTS_SCALABLE_H); // FADDP ., /M, ., . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index bf288c21b0e78..5597ec2275b26 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1056,22 +1056,33 @@ void emitter::emitInsSanityCheck(instrDesc* id) break; // Scalable, 4 regs, to predicate register. - case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + elemsize = id->idOpSize(); + assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableSimple(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableWide(id->idInsOpt())); // xx assert(isPredicateRegister(id->idReg1())); // DDDD assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // mmmmm - assert(isVectorRegister(id->idReg4())); // nnnnn - if (id->idInsFmt() == IF_SVE_CX_4A) - { - assert(insOptsScalableSimple(id->idInsOpt())); // xx - } - else - { - assert(insOptsScalableWide(id->idInsOpt())); // xx - } + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + elemsize = id->idOpSize(); + assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableAtMaxHalf(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm break; // Scalable FP. @@ -9998,8 +10009,8 @@ void emitter::emitIns_R_R_R_R(instruction ins, case INS_sve_cmplt: assert(isPredicateRegister(reg1)); // DDDD assert(isLowPredicateRegister(reg2)); // ggg - assert(isVectorRegister(reg3)); // mmmmm - assert(isVectorRegister(reg4)); // nnnnn + assert(isVectorRegister(reg3)); // nnnnn + assert(isVectorRegister(reg4)); // mmmmm assert(isScalableVectorSize(attr)); // xx if (insOptsScalableSimple(opt)) { @@ -10012,6 +10023,17 @@ void emitter::emitIns_R_R_R_R(instruction ins, } break; + case INS_sve_match: + case INS_sve_nmatch: + assert(insOptsScalableAtMaxHalf(opt)); + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isVectorRegister(reg3)); // nnnnn + assert(isVectorRegister(reg4)); // mmmmm + assert(isScalableVectorSize(attr)); // xx + fmt = IF_SVE_GE_4A; + break; + case INS_sve_mla: case INS_sve_mls: assert(insOptsScalableSimple(opt)); @@ -14962,6 +14984,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg @@ -17430,10 +17453,11 @@ void emitter::emitDispInsHelp( // ., /Z, ., . case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg - emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // mmmmm - emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // nnnnn + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); // mmmmm break; // ., /Z, ., .D @@ -19868,6 +19892,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; + case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + result.insLatency = PERFSCORE_LATENCY_2C; + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + break; + // Extract/insert operation, SIMD and FP scalar form case IF_SVE_CR_3A: // ........xx...... ...gggnnnnnddddd -- SVE extract element to SIMD&FP scalar register result.insLatency = PERFSCORE_LATENCY_3C; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index b7a776c6691e4..eef2b60a98323 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -920,6 +920,12 @@ inline static bool insOptsScalableAtLeastHalf(insOpts opt) return ((opt == INS_OPTS_SCALABLE_H) || (opt == INS_OPTS_SCALABLE_S) || (opt == INS_OPTS_SCALABLE_D)); } +inline static bool insOptsScalableAtMaxHalf(insOpts opt) +{ + // `opt` is any of the standard half and below scalable types. + return ((opt == INS_OPTS_SCALABLE_B) || (opt == INS_OPTS_SCALABLE_H)); +} + inline static bool insOptsScalableFloat(insOpts opt) { // `opt` is any of the standard scalable types that are valid for FP. From 46624928612bd79beaef61801a6c1eaabe486598 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Thu, 21 Dec 2023 13:18:10 +0000 Subject: [PATCH 4/7] Fix build issue --- src/coreclr/jit/emitarm64.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5597ec2275b26..b512ea9bf169c 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1074,6 +1074,7 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // nnnnn assert(isVectorRegister(id->idReg4())); // mmmmm + break; case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match elemsize = id->idOpSize(); From 1756c12794c21a5c98d2dd187990189abf24d4ad Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 22 Dec 2023 17:54:50 +0000 Subject: [PATCH 5/7] Add Arm64 encodings for case IF_SVE_HT_4A group --- src/coreclr/jit/codegenarm64test.cpp | 24 +++++++++++++ src/coreclr/jit/emitarm64.cpp | 53 ++++++++++++++++++++++++++-- 2 files changed, 75 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 125d66843a124..2bdf281db4e8f 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4928,6 +4928,30 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P4, REG_V29, INS_OPTS_SCALABLE_D); // FSUBR ., /M, ., . + // IF_SVE_HT_4A + theEmitter->emitIns_R_R_R_R(INS_sve_facge, EA_SCALABLE, REG_P0, REG_P0, REG_V10, REG_V31, + INS_OPTS_SCALABLE_H); // FACGE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_facgt, EA_SCALABLE, REG_P15, REG_P1, REG_V20, REG_V21, + INS_OPTS_SCALABLE_S); // FACGT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_facle, EA_SCALABLE, REG_P1, REG_P2, REG_V0, REG_V11, + INS_OPTS_SCALABLE_D); // FACLE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_faclt, EA_SCALABLE, REG_P14, REG_P3, REG_V30, REG_V1, + INS_OPTS_SCALABLE_H); // FACLT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmeq, EA_SCALABLE, REG_P2, REG_P4, REG_V28, REG_V8, + INS_OPTS_SCALABLE_S); // FCMEQ ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmge, EA_SCALABLE, REG_P13, REG_P5, REG_V8, REG_V18, + INS_OPTS_SCALABLE_D); // FCMGE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmgt, EA_SCALABLE, REG_P3, REG_P6, REG_V18, REG_V28, + INS_OPTS_SCALABLE_H); // FCMGT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmle, EA_SCALABLE, REG_P12, REG_P7, REG_V1, REG_V30, + INS_OPTS_SCALABLE_S); // FCMLE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmlt, EA_SCALABLE, REG_P4, REG_P0, REG_V11, REG_V0, + INS_OPTS_SCALABLE_D); // FCMLT ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmne, EA_SCALABLE, REG_P11, REG_P1, REG_V21, REG_V10, + INS_OPTS_SCALABLE_H); // FCMNE ., /Z, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fcmuo, EA_SCALABLE, REG_P5, REG_P2, REG_V31, REG_V20, + INS_OPTS_SCALABLE_S); // FCMUO ., /Z, ., . + // IF_SVE_AF_3A theEmitter->emitIns_R_R_R(INS_sve_andv, EA_1BYTE, REG_V0, REG_P0, REG_V0, INS_OPTS_SCALABLE_B_WITH_SIMD_SCALAR); // ANDV , , . diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index b512ea9bf169c..393e1bf9f98c1 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1086,6 +1086,16 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg4())); // mmmmm break; + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors + elemsize = id->idOpSize(); + assert(isScalableVectorSize(elemsize)); + assert(insOptsScalableFloat(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + break; + // Scalable FP. case IF_SVE_GR_3A: // ........xx...... ...gggmmmmmddddd -- SVE2 floating-point pairwise operations case IF_SVE_HL_3A: // ........xx...... ...gggmmmmmddddd -- SVE floating-point arithmetic (predicated) @@ -10024,6 +10034,26 @@ void emitter::emitIns_R_R_R_R(instruction ins, } break; + case INS_sve_fcmeq: + case INS_sve_fcmge: + case INS_sve_facge: + case INS_sve_fcmgt: + case INS_sve_facgt: + case INS_sve_fcmlt: + case INS_sve_fcmle: + case INS_sve_fcmne: + case INS_sve_fcmuo: + case INS_sve_facle: + case INS_sve_faclt: + assert(insOptsScalableFloat(opt)); + assert(isVectorRegister(reg3)); // nnnnn + assert(isVectorRegister(reg4)); // mmmmm + assert(isPredicateRegister(reg1)); // DDDD + assert(isLowPredicateRegister(reg2)); // ggg + assert(isScalableVectorSize(attr)); // xx + fmt = IF_SVE_HT_4A; + break; + case INS_sve_match: case INS_sve_nmatch: assert(insOptsScalableAtMaxHalf(opt)); @@ -10082,6 +10112,22 @@ void emitter::emitIns_R_R_R_R(instruction ins, std::swap(reg3, reg4); ins = INS_sve_cmpgt; break; + case INS_sve_facle: + std::swap(reg3, reg4); + ins = INS_sve_facge; + break; + case INS_sve_faclt: + std::swap(reg3, reg4); + ins = INS_sve_facgt; + break; + case INS_sve_fcmle: + std::swap(reg3, reg4); + ins = INS_sve_fcmge; + break; + case INS_sve_fcmlt: + std::swap(reg3, reg4); + ins = INS_sve_fcmgt; + break; default: break; } @@ -14986,11 +15032,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors code = emitInsCodeSve(ins, fmt); code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg - code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm - code |= insEncodeReg_V_20_to_16(id->idReg4()); // nnnnn + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm code |= insEncodeElemsize(optGetSveElemsize(id->idInsOpt())); // xx dst += emitOutput_Instr(dst, code); break; @@ -17455,6 +17502,7 @@ void emitter::emitDispInsHelp( // ., /Z, ., . case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors emitDispPredicateReg(id->idReg1(), PREDICATE_SIZED, id->idInsOpt(), true); // DDDD emitDispPredicateReg(id->idReg2(), PREDICATE_ZERO, id->idInsOpt(), true); // ggg emitDispSveReg(id->idReg3(), id->idInsOpt(), true); // nnnnn @@ -19894,6 +19942,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins break; case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match + case IF_SVE_HT_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE floating-point compare vectors result.insLatency = PERFSCORE_LATENCY_2C; result.insThroughput = PERFSCORE_THROUGHPUT_1C; break; From a3827cd72b60c43e54278b9f17021a2435248e6b Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 12 Jan 2024 13:25:08 +0000 Subject: [PATCH 6/7] Fix build and formatting --- src/coreclr/jit/codegenarm64test.cpp | 40 ++++++++++++++-------------- src/coreclr/jit/emitarm64.cpp | 25 ++++++++--------- src/coreclr/jit/emitarm64.h | 15 ++++++----- 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 196bf947dca97..4cbbaebfa0e3b 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -4773,26 +4773,26 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_H); // CMPNE ., /Z, ., . // IF_SVE_CX_4A_A - theEmitter->emitIns_R_R_R_R(INS_sve_cmpeq, EA_SCALABLE, REG_P15, REG_P7, REG_V31, REG_V3, - INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WIDE); /* CMPEQ ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmpge, EA_SCALABLE, REG_P14, REG_P6, REG_V21, REG_V13, - INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_WIDE); /* CMPGE ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmpgt, EA_SCALABLE, REG_P13, REG_P5, REG_V11, REG_V23, - INS_OPTS_SCALABLE_S, INS_SCALABLE_OPTS_WIDE); /* CMPGT ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmphi, EA_SCALABLE, REG_P12, REG_P4, REG_V1, REG_V31, - INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WIDE); /* CMPHI ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmphs, EA_SCALABLE, REG_P11, REG_P3, REG_V0, REG_V30, - INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_WIDE); /* CMPHS ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmple, EA_SCALABLE, REG_P4, REG_P2, REG_V10, REG_V0, - INS_OPTS_SCALABLE_S, INS_SCALABLE_OPTS_WIDE); /* CMPLE ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmplo, EA_SCALABLE, REG_P3, REG_P1, REG_V20, REG_V1, - INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WIDE); /* CMPLO ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmpls, EA_SCALABLE, REG_P2, REG_P0, REG_V30, REG_V2, - INS_OPTS_SCALABLE_H, INS_SCALABLE_OPTS_WIDE); /* CMPLS ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmplt, EA_SCALABLE, REG_P1, REG_P7, REG_V24, REG_V8, - INS_OPTS_SCALABLE_S, INS_SCALABLE_OPTS_WIDE); /* CMPLT ., /Z, ., .D */ - theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P0, REG_P0, REG_V14, REG_V28, - INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WIDE); /* CMPNE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpeq, EA_SCALABLE, REG_P15, REG_P7, REG_V31, REG_V3, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPEQ ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpge, EA_SCALABLE, REG_P14, REG_P6, REG_V21, REG_V13, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WIDE); /* CMPGE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpgt, EA_SCALABLE, REG_P13, REG_P5, REG_V11, REG_V23, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WIDE); /* CMPGT ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmphi, EA_SCALABLE, REG_P12, REG_P4, REG_V1, REG_V31, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPHI ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmphs, EA_SCALABLE, REG_P11, REG_P3, REG_V0, REG_V30, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WIDE); /* CMPHS ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmple, EA_SCALABLE, REG_P4, REG_P2, REG_V10, REG_V0, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WIDE); /* CMPLE ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmplo, EA_SCALABLE, REG_P3, REG_P1, REG_V20, REG_V1, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPLO ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpls, EA_SCALABLE, REG_P2, REG_P0, REG_V30, REG_V2, INS_OPTS_SCALABLE_H, + INS_SCALABLE_OPTS_WIDE); /* CMPLS ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmplt, EA_SCALABLE, REG_P1, REG_P7, REG_V24, REG_V8, INS_OPTS_SCALABLE_S, + INS_SCALABLE_OPTS_WIDE); /* CMPLT ., /Z, ., .D */ + theEmitter->emitIns_R_R_R_R(INS_sve_cmpne, EA_SCALABLE, REG_P0, REG_P0, REG_V14, REG_V28, INS_OPTS_SCALABLE_B, + INS_SCALABLE_OPTS_WIDE); /* CMPNE ., /Z, ., .D */ // IF_SVE_EP_3A theEmitter->emitIns_R_R_R(INS_sve_shadd, EA_SCALABLE, REG_V15, REG_P0, REG_V10, diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 467f425507a54..bbc5a450c020f 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1059,11 +1059,11 @@ void emitter::emitInsSanityCheck(instrDesc* id) case IF_SVE_CX_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors elemsize = id->idOpSize(); assert(isScalableVectorSize(elemsize)); - assert(insOptsScalableSimple(id->idInsOpt())); // xx - assert(isPredicateRegister(id->idReg1())); // DDDD - assert(isLowPredicateRegister(id->idReg2())); // ggg - assert(isVectorRegister(id->idReg3())); // nnnnn - assert(isVectorRegister(id->idReg4())); // mmmmm + assert(insOptsScalableStandard(id->idInsOpt())); // xx + assert(isPredicateRegister(id->idReg1())); // DDDD + assert(isLowPredicateRegister(id->idReg2())); // ggg + assert(isVectorRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm break; case IF_SVE_CX_4A_A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors @@ -10652,13 +10652,14 @@ void emitter::emitIns_R_R_I_I( * Add an instruction referencing four registers. */ -void emitter::emitIns_R_R_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - regNumber reg4, - insOpts opt /* = INS_OPT_NONE*/) +void emitter::emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt /* = INS_OPT_NONE*/, + insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */) { emitAttr size = EA_SIZE(attr); insFormat fmt = IF_NONE; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 67b8a8f87b672..b69d71eff8342 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -1106,13 +1106,14 @@ void emitIns_R_R_R_Ext(instruction ins, void emitIns_R_R_I_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int imm1, int imm2, insOpts opt = INS_OPTS_NONE); -void emitIns_R_R_R_R(instruction ins, - emitAttr attr, - regNumber reg1, - regNumber reg2, - regNumber reg3, - regNumber reg4, - insOpts opt = INS_OPTS_NONE); +void emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts opt = INS_OPTS_NONE, + insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond); From b42718768b4d0fc2a7f65a5f1dd35be125b999e6 Mon Sep 17 00:00:00 2001 From: Swapnil Gaikwad Date: Fri, 12 Jan 2024 13:36:33 +0000 Subject: [PATCH 7/7] Remove redundant asserts --- src/coreclr/jit/emitarm64.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index bbc5a450c020f..0de5ebbfdcd73 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1074,14 +1074,6 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isLowPredicateRegister(id->idReg2())); // ggg assert(isVectorRegister(id->idReg3())); // mmmmm assert(isVectorRegister(id->idReg4())); // nnnnn - if (id->idInsFmt() == IF_SVE_CX_4A) - { - assert(insOptsScalableStandard(id->idInsOpt())); // xx - } - else - { - assert(insOptsScalableWide(id->idInsOpt())); // xx - } break; case IF_SVE_GE_4A: // ........xx.mmmmm ...gggnnnnn.DDDD -- SVE2 character match