diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 3d64f803b72c3..619047a20102d 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -7203,6 +7203,101 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_R_R_R(INS_sve_st1b, EA_SCALABLE, REG_V6, REG_P3, REG_R0, REG_V4, INS_OPTS_SCALABLE_D); // ST1B {.D }, , [, .D] + // IF_SVE_GP_3A + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 90, + INS_OPTS_SCALABLE_H); // FCADD ., /M, ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270, + INS_OPTS_SCALABLE_H); // FCADD ., /M, ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270, + INS_OPTS_SCALABLE_S); // FCADD ., /M, ., ., + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270, + INS_OPTS_SCALABLE_D); // FCADD ., /M, ., ., + + // IF_SVE_GT_4A + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P1, REG_V3, REG_V4, 0, + INS_OPTS_SCALABLE_H); // FCMLA ., /M, ., ., + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_P2, REG_V1, REG_V5, 90, + INS_OPTS_SCALABLE_S); // FCMLA ., /M, ., ., + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 180, + INS_OPTS_SCALABLE_D); // FCMLA ., /M, ., ., + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 270, + INS_OPTS_SCALABLE_D); // FCMLA ., /M, ., ., + + // IF_SVE_HI_3A + theEmitter->emitIns_R_R_R(INS_sve_fcmeq, EA_SCALABLE, REG_P2, REG_P3, REG_V4, + INS_OPTS_SCALABLE_H); // FCMEQ ., /Z, ., #0.0 + theEmitter->emitIns_R_R_R(INS_sve_fcmge, EA_SCALABLE, REG_P1, REG_P2, REG_V3, + INS_OPTS_SCALABLE_S); // FCMGE ., /Z, ., #0.0 + theEmitter->emitIns_R_R_R(INS_sve_fcmgt, EA_SCALABLE, REG_P11, REG_P5, REG_V2, + INS_OPTS_SCALABLE_D); // FCMGT ., /Z, ., #0.0 + theEmitter->emitIns_R_R_R(INS_sve_fcmle, EA_SCALABLE, REG_P2, REG_P7, REG_V1, + INS_OPTS_SCALABLE_H); // FCMLE ., /Z, ., #0.0 + theEmitter->emitIns_R_R_R(INS_sve_fcmlt, EA_SCALABLE, REG_P3, REG_P1, REG_V0, + INS_OPTS_SCALABLE_S); // FCMLT ., /Z, ., #0.0 + theEmitter->emitIns_R_R_R(INS_sve_fcmne, EA_SCALABLE, REG_P1, REG_P0, REG_V5, + INS_OPTS_SCALABLE_D); // FCMNE ., /Z, ., #0.0 + + // IF_SVE_HM_2A + theEmitter->emitIns_R_R_F(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_P0, 0.5, + INS_OPTS_SCALABLE_H); // FADD ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fadd, EA_SCALABLE, REG_V0, REG_P1, 1.0, + INS_OPTS_SCALABLE_H); // FADD ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmax, EA_SCALABLE, REG_V1, REG_P0, 0.0, + INS_OPTS_SCALABLE_S); // FMAX ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmax, EA_SCALABLE, REG_V1, REG_P0, 1.0, + INS_OPTS_SCALABLE_S); // FMAX ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmaxnm, EA_SCALABLE, REG_V3, REG_P4, 0.0, + INS_OPTS_SCALABLE_D); // FMAXNM ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmaxnm, EA_SCALABLE, REG_V3, REG_P4, 1.0, + INS_OPTS_SCALABLE_D); // FMAXNM ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmin, EA_SCALABLE, REG_V6, REG_P5, 0.0, + INS_OPTS_SCALABLE_H); // FMIN ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmin, EA_SCALABLE, REG_V6, REG_P5, 1.0, + INS_OPTS_SCALABLE_H); // FMIN ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fminnm, EA_SCALABLE, REG_V2, REG_P4, 0.0, + INS_OPTS_SCALABLE_S); // FMINNM ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fminnm, EA_SCALABLE, REG_V2, REG_P4, 1.0, + INS_OPTS_SCALABLE_S); // FMINNM ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmul, EA_SCALABLE, REG_V5, REG_P1, 0.5, + INS_OPTS_SCALABLE_D); // FMUL ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fmul, EA_SCALABLE, REG_V5, REG_P1, 2.0, + INS_OPTS_SCALABLE_D); // FMUL ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fsub, EA_SCALABLE, REG_V7, REG_P2, 0.5, + INS_OPTS_SCALABLE_H); // FSUB ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fsub, EA_SCALABLE, REG_V7, REG_P2, 1.0, + INS_OPTS_SCALABLE_H); // FSUB ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P3, 0.5, + INS_OPTS_SCALABLE_S); // FSUBR ., /M, ., + theEmitter->emitIns_R_R_F(INS_sve_fsubr, EA_SCALABLE, REG_V6, REG_P3, 1.0, + INS_OPTS_SCALABLE_S); // FSUBR ., /M, ., + + // IF_SVE_HN_2A + // FTMAD ., ., ., # + theEmitter->emitIns_R_R_I(INS_sve_ftmad, EA_SCALABLE, REG_V0, REG_V2, 0, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_I(INS_sve_ftmad, EA_SCALABLE, REG_V3, REG_V5, 1, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_I(INS_sve_ftmad, EA_SCALABLE, REG_V4, REG_V2, 7, INS_OPTS_SCALABLE_D); + + // IF_SVE_HP_3A + // FLOGB ., /M, . + theEmitter->emitIns_R_R_R(INS_sve_flogb, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_H); + theEmitter->emitIns_R_R_R(INS_sve_flogb, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_S); + theEmitter->emitIns_R_R_R(INS_sve_flogb, EA_SCALABLE, REG_V31, REG_P7, REG_V31, INS_OPTS_SCALABLE_D); + + // IF_SVE_HU_4B + theEmitter->emitIns_R_R_R_R(INS_sve_bfmla, EA_SCALABLE, REG_V1, REG_P5, REG_V4, REG_V3, + INS_OPTS_SCALABLE_H); // BFMLA .H, /M, .H, .H + theEmitter->emitIns_R_R_R_R(INS_sve_bfmls, EA_SCALABLE, REG_V0, REG_P6, REG_V1, REG_V2, + INS_OPTS_SCALABLE_H); // BFMLS .H, /M, .H, .H + + // IF_SVE_HV_4A + theEmitter->emitIns_R_R_R_R(INS_sve_fmad, EA_SCALABLE, REG_V3, REG_P4, REG_V5, REG_V6, + INS_OPTS_SCALABLE_H); // FMAD ., /M, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fmsb, EA_SCALABLE, REG_V1, REG_P2, REG_V3, REG_V7, + INS_OPTS_SCALABLE_S); // FMSB ., /M, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fnmad, EA_SCALABLE, REG_V0, REG_P5, REG_V4, REG_V3, + INS_OPTS_SCALABLE_D); // FNMAD ., /M, ., . + theEmitter->emitIns_R_R_R_R(INS_sve_fnmsb, EA_SCALABLE, REG_V5, REG_P3, REG_V1, REG_V2, + INS_OPTS_SCALABLE_S); // FNMSB ., /M, ., . // IF_SVE_ID_2A // LDR , [{, #, MUL VL}] theEmitter->emitIns_R_R_I(INS_sve_ldr, EA_SCALABLE, REG_P1, REG_R5, 0); diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index f52a99c238dd8..296623036d8df 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -2075,6 +2075,89 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isScalableVectorSize(elemsize)); break; + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(emitIsValidEncodedRotationImm90_or_270(imm)); + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(emitIsValidEncodedRotationImm0_to_270(imm)); + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isPredicateRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isScalableVectorSize(elemsize)); + assert(emitIsValidEncodedSmallFloatImm(imm)); + break; + + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + imm = emitGetInsSC(id); + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isVectorRegister(id->idReg2())); + assert(isValidUimm3(imm)); + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_H); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + elemsize = id->idOpSize(); + assert(insOptsScalableAtLeastHalf(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); + assert(isLowPredicateRegister(id->idReg2())); + assert(isVectorRegister(id->idReg3())); + assert(isVectorRegister(id->idReg4())); + assert(isScalableVectorSize(elemsize)); + break; + case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register elemsize = id->idOpSize(); @@ -5965,6 +6048,236 @@ emitter::code_t emitter::emitInsCodeSve(instruction ins, insFormat fmt) return result; } +/************************************************************************ + * + * Convert a rotation value that is 90 or 270 into a smaller encoding that matches one-to-one with the 'rot' field. + */ + +/*static*/ ssize_t emitter::emitEncodeRotationImm90_or_270(ssize_t imm) +{ + switch (imm) + { + case 90: + return 0; + + case 270: + return 1; + + default: + break; + } + + assert(!"Invalid rotation value"); + return 0; +} + +/************************************************************************ + * + * Convert an encoded rotation value to 90 or 270. + */ + +/*static*/ ssize_t emitter::emitDecodeRotationImm90_or_270(ssize_t imm) +{ + assert(emitIsValidEncodedRotationImm0_to_270(imm)); + switch (imm) + { + case 0: + return 90; + + case 1: + return 270; + + default: + break; + } + + return 0; +} + +/************************************************************************ + * + * Check if the immediate value is a valid encoded rotation value for 90 or 270. + */ + +/*static*/ bool emitter::emitIsValidEncodedRotationImm90_or_270(ssize_t imm) +{ + return (imm == 0) || (imm == 1); +} + +/************************************************************************ + * + * Convert a rotation value that is 0, 90, 180 or 270 into a smaller encoding that matches one-to-one with the 'rot' + * field. + */ + +/*static*/ ssize_t emitter::emitEncodeRotationImm0_to_270(ssize_t imm) +{ + switch (imm) + { + case 0: + return 0; + + case 90: + return 1; + + case 180: + return 2; + + case 270: + return 3; + + default: + break; + } + + assert(!"Invalid rotation value"); + return 0; +} + +/************************************************************************ + * + * Convert an encoded rotation value to 0, 90, 180 or 270. + */ + +/*static*/ ssize_t emitter::emitDecodeRotationImm0_to_270(ssize_t imm) +{ + assert(emitIsValidEncodedRotationImm0_to_270(imm)); + switch (imm) + { + case 0: + return 0; + + case 1: + return 90; + + case 2: + return 180; + + case 3: + return 270; + + default: + break; + } + + return 0; +} + +/************************************************************************ + * + * Check if the immediate value is a valid encoded rotation value for 0, 90, 180 or 270. + */ + +/*static*/ bool emitter::emitIsValidEncodedRotationImm0_to_270(ssize_t imm) +{ + return (imm >= 0) && (imm <= 3); +} + +/************************************************************************ + * + * Convert a small immediate float value to an encoded version that matches one-to-one with the instructions. + * The instruction determines the value. + */ + +/*static*/ ssize_t emitter::emitEncodeSmallFloatImm(double immDbl, instruction ins) +{ +#ifdef DEBUG + switch (ins) + { + case INS_sve_fadd: + case INS_sve_fsub: + case INS_sve_fsubr: + assert((immDbl == 0.5) || (immDbl == 1.0)); + break; + + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + assert((immDbl == 0) || (immDbl == 1.0)); + break; + + case INS_sve_fmul: + assert((immDbl == 0.5) || (immDbl == 2.0)); + break; + + default: + assert(!"Invalid instruction"); + break; + } +#endif // DEBUG + if (immDbl < 1.0) + { + return 0; + } + return 1; +} + +/************************************************************************ + * + * Convert an encoded small float immediate value. The instruction determines the value. + */ + +/*static*/ double emitter::emitDecodeSmallFloatImm(ssize_t imm, instruction ins) +{ + assert(emitIsValidEncodedSmallFloatImm(imm)); + switch (ins) + { + case INS_sve_fadd: + case INS_sve_fsub: + case INS_sve_fsubr: + if (imm == 0) + { + return 0.5; + } + else + { + return 1.0; + } + + case INS_sve_fmax: + case INS_sve_fmaxnm: + case INS_sve_fmin: + case INS_sve_fminnm: + if (imm == 0) + { + return 0.0; + } + else + { + return 1.0; + } + break; + + case INS_sve_fmul: + if (imm == 0) + { + return 0.5; + } + else + { + return 2.0; + } + break; + + default: + break; + } + + assert(!"Invalid instruction"); + return 0.0; +} + +/************************************************************************ + * + * Check if the immediate value is a valid encoded small float. + */ + +/*static*/ bool emitter::emitIsValidEncodedSmallFloatImm(size_t imm) +{ + return (imm == 0) || (imm == 1); +} + /***************************************************************************** * * For the given 'ins' returns the reverse instruction @@ -8825,6 +9138,16 @@ void emitter::emitIns_R_R_I(instruction ins, } break; + case INS_sve_ftmad: + assert(insOptsScalableAtLeastHalf(opt)); + assert(insScalableOptsNone(sopt)); + assert(isVectorRegister(reg1)); + assert(isVectorRegister(reg2)); + assert(isValidUimm3(imm)); + assert(isScalableVectorSize(size)); + fmt = IF_SVE_HN_2A; + break; + case INS_sve_ldr: assert(insOptsNone(opt)); assert(isScalableVectorSize(size)); @@ -9011,6 +9334,58 @@ void emitter::emitIns_R_R_I(instruction ins, appendToCurIG(id); } +/***************************************************************************** + * + * Add an instruction referencing two registers and a floating point constant. + */ + +void emitter::emitIns_R_R_F( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt /* = INS_OPTS_NONE */) +{ + ssize_t imm = 0; + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_fmul: + case INS_sve_fmaxnm: + case INS_sve_fadd: + case INS_sve_fmax: + case INS_sve_fminnm: + case INS_sve_fsub: + case INS_sve_fmin: + case INS_sve_fsubr: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isScalableVectorSize(size)); + imm = emitEncodeSmallFloatImm(immDbl, ins); + fmt = IF_SVE_HM_2A; + break; + + default: + unreached(); + break; + + } // end switch (ins) + + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrSC(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + + dispIns(id); + appendToCurIG(id); +} + /***************************************************************************** * * Add an instruction referencing two registers and a constant. @@ -10496,6 +10871,31 @@ void emitter::emitIns_R_R_R(instruction ins, fmt = IF_SVE_CZ_4A; break; + case INS_sve_fcmeq: + case INS_sve_fcmge: + case INS_sve_fcmgt: + case INS_sve_fcmlt: + case INS_sve_fcmle: + case INS_sve_fcmne: + assert(insOptsScalableAtLeastHalf(opt)); + assert(insScalableOptsNone(sopt)); + assert(isPredicateRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isScalableVectorSize(size)); + fmt = IF_SVE_HI_3A; + break; + + case INS_sve_flogb: + assert(insOptsScalableAtLeastHalf(opt)); + assert(insScalableOptsNone(sopt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isScalableVectorSize(size)); + fmt = IF_SVE_HP_3A; + break; + default: unreached(); break; @@ -11850,6 +12250,16 @@ void emitter::emitIns_R_R_R_I(instruction ins, } break; + case INS_sve_fcadd: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isScalableVectorSize(size)); + imm = emitEncodeRotationImm90_or_270(imm); + fmt = IF_SVE_GP_3A; + break; + default: unreached(); break; @@ -13354,6 +13764,30 @@ void emitter::emitInsSve_R_R_R_R(instruction ins, fmt = IF_SVE_JF_4A; break; + case INS_sve_bfmla: + case INS_sve_bfmls: + assert(opt == INS_OPTS_SCALABLE_H); + assert(insScalableOptsNone(sopt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg4)); + fmt = IF_SVE_HU_4B; + break; + + case INS_sve_fmad: + case INS_sve_fmsb: + case INS_sve_fnmad: + case INS_sve_fnmsb: + assert(insOptsScalableAtLeastHalf(opt)); + assert(insScalableOptsNone(sopt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg4)); + fmt = IF_SVE_HV_4A; + break; + default: unreached(); break; @@ -13414,6 +13848,58 @@ void emitter::emitInsSve_R_R_R_R(instruction ins, appendToCurIG(id); } +/***************************************************************************** + * + * Add an instruction referencing four registers and a constant. + */ + +void emitter::emitIns_R_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + ssize_t imm, + insOpts opt /* = INS_OPT_NONE*/) +{ + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_NONE; + + /* Figure out the encoding format of the instruction */ + switch (ins) + { + case INS_sve_fcmla: + assert(insOptsScalableAtLeastHalf(opt)); + assert(isVectorRegister(reg1)); + assert(isLowPredicateRegister(reg2)); + assert(isVectorRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(isScalableVectorSize(size)); + imm = emitEncodeRotationImm0_to_270(imm); + fmt = IF_SVE_GT_4A; + break; + + default: + unreached(); + break; + } + assert(fmt != IF_NONE); + + instrDesc* id = emitNewInstrCns(attr, imm); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(opt); + + id->idReg1(reg1); + id->idReg2(reg2); + id->idReg3(reg3); + id->idReg4(reg4); + + dispIns(id); + appendToCurIG(id); +} + /***************************************************************************** * * Add an instruction referencing a register and a condition code @@ -16305,6 +16791,34 @@ void emitter::emitIns_Call(EmitCallType callType, return 0; } +/***************************************************************************** + * + * Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction + * This specifically encodes the size at bit locations '18-17'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveElemsize_18_to_17(emitAttr size) +{ + switch (size) + { + case EA_1BYTE: + return 0; + + case EA_2BYTE: + return (1 << 17); // set the bit at location 17 + + case EA_4BYTE: + return (1 << 18); // set the bit at location 18 + + case EA_8BYTE: + return (1 << 18) | (1 << 17); // set the bit at location 18 and 17 + + default: + assert(!"Invalid insOpt for vector register"); + } + return 0; +} + /***************************************************************************** * * Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction @@ -16390,6 +16904,42 @@ void emitter::emitIns_Call(EmitCallType callType, return encodedSize | imm3High | imm3Low; } +/***************************************************************************** + * + * Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction + * This specifically encode the field 'rot' at bit location '16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveImm90_or_270_rot(ssize_t imm) +{ + assert(emitIsValidEncodedRotationImm90_or_270(imm)); + return (code_t)(imm << 16); +} + +/***************************************************************************** + * + * Returns the encoding to select the constant values 0, 90, 180 or 270 for an Arm64 SVE vector instruction + * This specifically encode the field 'rot' at bit locations '14-13'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveImm0_to_270_rot(ssize_t imm) +{ + assert(emitIsValidEncodedRotationImm0_to_270(imm)); + return (code_t)(imm << 13); +} + +/***************************************************************************** + * + * Returns the encoding to select the constant float values 0, 0.5, 1.0 or 2.0 for an Arm64 SVE vector instruction + * This specifically encode the field 'i1' at bit location '5'. + */ + +/*static*/ emitter::code_t emitter::insEncodeSveSmallFloatImm(ssize_t imm) +{ + assert(emitIsValidEncodedSmallFloatImm(imm)); + return (code_t)(imm << 5); +} + /***************************************************************************** * * Returns the register list size for the given SVE instruction. @@ -16555,7 +17105,6 @@ void emitter::emitIns_Call(EmitCallType callType, case IF_SVE_IM_3A: case IF_SVE_IN_4A: case IF_SVE_IX_4A: - case IF_SVE_HI_3A: case IF_SVE_IO_3A: case IF_SVE_IP_4A: case IF_SVE_IQ_3A: @@ -16757,6 +17306,10 @@ void emitter::emitIns_Call(EmitCallType callType, assert((regpos == 2) || (regpos == 3)); return ((regpos == 2) ? PREDICATE_NONE : PREDICATE_SIZED); + case IF_SVE_HI_3A: + assert((regpos == 1) || (regpos == 2)); + return ((regpos == 2) ? PREDICATE_ZERO : PREDICATE_SIZED); + case IF_SVE_ID_2A: case IF_SVE_JG_2A: return PREDICATE_NONE; @@ -18085,6 +18638,17 @@ void emitter::emitIns_Call(EmitCallType callType, return (code_t)((imm & 0xFF) << 5); } +/***************************************************************************** + * + * Returns the encoding for the unsigned immediate value as 3-bits at bit locations '18-16'. + */ + +/*static*/ emitter::code_t emitter::insEncodeUimm3_18_to_16(ssize_t imm) +{ + assert(isValidUimm3(imm)); + return (code_t)imm << 16; +} + /***************************************************************************** * * Returns the encoding to select the 4/8-byte width specifier @@ -21229,6 +21793,90 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeSveImm90_or_270_rot(imm); // r + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + code |= insEncodeSveImm0_to_270_rot(imm); // rr + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_P_3_to_0(id->idReg1()); // DDDD + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + { + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeSveSmallFloatImm(imm); // i + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + } + break; + + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + imm = emitGetInsSC(id); + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_V_9_to_5(id->idReg2()); // mmmmm + code |= insEncodeUimm3_18_to_16(imm); // iii + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeSveElemsize_18_to_17(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ddddd + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_V_9_to_5(id->idReg3()); // mmmmm + code |= insEncodeReg_V_20_to_16(id->idReg4()); // aaaaa + code |= insEncodeSveElemsize(optGetSveElemsize(id->idInsOpt())); // xx + dst += emitOutput_Instr(dst, code); + break; + case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register case IF_SVE_JG_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE store predicate register imm = emitGetInsSC(id); @@ -21404,6 +22052,19 @@ void emitter::emitDispFloatImm(ssize_t imm8) printf("%.4f", result); } +/***************************************************************************** + * + * Display an encoded small float constant value + */ +void emitter::emitDispSmallFloatImm(ssize_t imm, instruction ins) +{ + if (strictArmAsm) + { + printf("#"); + } + printf("%.4f", emitDecodeSmallFloatImm(imm, ins)); +} + /***************************************************************************** * * Display an immediate with an optional left-shift. @@ -24532,6 +25193,70 @@ void emitter::emitDispInsHelp( emitDispSveImmMulVl(id->idReg2(), imm); break; + // ., /M, ., ., + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispImm(emitDecodeRotationImm90_or_270(imm), false); + break; + + // ., /M, ., ., + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveReg(id->idReg4(), id->idInsOpt(), true); + emitDispImm(emitDecodeRotationImm0_to_270(imm), false); + break; + + // ., /Z, ., #0.0 + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + emitDispPredicateReg(id->idReg1(), insGetPredicateType(fmt, 1), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt, 2), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispFloatZero(); + break; + + // ., /M, ., + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + imm = emitGetInsSC(id); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSmallFloatImm(imm, id->idIns()); + break; + + // ., ., ., # + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispSveReg(id->idReg2(), id->idInsOpt(), true); + emitDispImm(emitGetInsSC(id), false); + break; + + // ., /M, . + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), false); + break; + + // .H, /M, .H, .H + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + // ., /M, ., . + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + emitDispSveReg(id->idReg1(), id->idInsOpt(), true); + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); + emitDispSveReg(id->idReg3(), id->idInsOpt(), true); + emitDispSveReg(id->idReg4(), id->idInsOpt(), false); + break; + default: printf("unexpected format %s", emitIfName(id->idInsFmt())); assert(!"unexpectedFormat"); @@ -28000,6 +28725,73 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_GP_3A: // ........xx.....r ...gggmmmmmddddd -- SVE floating-point complex add (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_GT_4A: // ........xx.mmmmm .rrgggnnnnnddddd -- SVE floating-point complex multiply-add (predicated) + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_5C; + break; + + case IF_SVE_HI_3A: // ........xx...... ...gggnnnnn.DDDD -- SVE floating-point compare with zero + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_SVE_HM_2A: // ........xx...... ...ggg....iddddd -- SVE floating-point arithmetic with immediate + // (predicated) + switch (ins) + { + case INS_sve_fmul: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + default: + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + } + break; + + case IF_SVE_HN_2A: // ........xx...iii ......mmmmmddddd -- SVE floating-point trig multiply-add coefficient + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + + case IF_SVE_HP_3A: // .............xx. ...gggnnnnnddddd -- SVE floating-point convert to integer + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_3C; + break; + + case IF_SVE_HU_4B: // ...........mmmmm ...gggnnnnnddddd -- SVE floating-point multiply-accumulate writing addend + switch (ins) + { + case INS_sve_bfmla: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + case INS_sve_bfmls: + result.insThroughput = PERFSCORE_THROUGHPUT_1C; // need to fix + result.insLatency = PERFSCORE_LATENCY_1C; // need to fix + break; + + default: + // all other instructions + perfScoreUnhandledInstruction(id, &result); + break; + } + break; + + case IF_SVE_HV_4A: // ........xx.aaaaa ...gggmmmmmddddd -- SVE floating-point multiply-accumulate writing + // multiplicand + result.insThroughput = PERFSCORE_THROUGHPUT_2C; + result.insLatency = PERFSCORE_LATENCY_4C; + break; + case IF_SVE_ID_2A: // ..........iiiiii ...iiinnnnn.TTTT -- SVE load predicate register case IF_SVE_IE_2A: // ..........iiiiii ...iiinnnnnttttt -- SVE load vector register result.insThroughput = PERFSCORE_THROUGHPUT_3C; diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index 6fd90d29549bc..6868f27f5bab9 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -43,6 +43,7 @@ void emitDispImm(ssize_t imm, bool addComma, bool alwaysHex = false, bool isAddr void emitDispElementIndex(const ssize_t imm, const bool addComma); void emitDispFloatZero(); void emitDispFloatImm(ssize_t imm8); +void emitDispSmallFloatImm(ssize_t imm, instruction ins); void emitDispImmOptsLSL(ssize_t imm, bool hasShift, unsigned shiftAmount); void emitDispCond(insCond cond); void emitDispFlags(insCflags flags); @@ -294,6 +295,24 @@ static emitter::floatImm8 emitEncodeFloatImm8(double immDbl); static double emitDecodeFloatImm8(const emitter::floatImm8 fpImm); +static ssize_t emitEncodeRotationImm90_or_270(ssize_t imm); + +static ssize_t emitDecodeRotationImm90_or_270(ssize_t imm); + +static bool emitIsValidEncodedRotationImm90_or_270(ssize_t imm); + +static ssize_t emitEncodeRotationImm0_to_270(ssize_t imm); + +static ssize_t emitDecodeRotationImm0_to_270(ssize_t imm); + +static bool emitIsValidEncodedRotationImm0_to_270(ssize_t imm); + +static ssize_t emitEncodeSmallFloatImm(double immDbl, instruction ins); + +static double emitDecodeSmallFloatImm(ssize_t imm, instruction ins); + +static bool emitIsValidEncodedSmallFloatImm(size_t imm); + /************************************************************************ * * This union is used to encode/decode the cond, nzcv and imm5 values for @@ -502,6 +521,10 @@ static code_t insEncodeSveElemsize(emitAttr size); // This specifically encodes the size at bit locations '22-21'. static code_t insEncodeSveElemsize_22_to_21(emitAttr size); +// Returns the encoding to select the 1/2/4/8 byte elemsize for an Arm64 Sve vector instruction +// This specifically encodes the size at bit locations '18-17'. +static code_t insEncodeSveElemsize_18_to_17(emitAttr size); + // Returns the encoding to select the 4/8 byte elemsize for an Arm64 Sve vector instruction // This specifically encodes the field 'sz' at bit location '21'. static code_t insEncodeSveElemsize_sz_21(emitAttr size); @@ -510,6 +533,18 @@ static code_t insEncodeSveElemsize_sz_21(emitAttr size); // This specifically encodes the field 'tszh:tszl' at bit locations '22:20-19'. static code_t insEncodeSveElemsize_tszh_22_tszl_20_to_19(emitAttr size); +// Returns the encoding to select the constant values 90 or 270 for an Arm64 SVE vector instruction +// This specifically encode the field 'rot' at bit location '16'. +static code_t insEncodeSveImm90_or_270_rot(ssize_t imm); + +// Returns the encoding to select the constant values 0, 90, 180 or 270 for an Arm64 SVE vector instruction +// This specifically encode the field 'rot' at bit locations '14-13'. +static code_t insEncodeSveImm0_to_270_rot(ssize_t imm); + +// Returns the encoding to select the constant float values 0, 0.5, 1.0 or 2.0 for an Arm64 SVE vector instruction +// This specifically encode the field 'i1' at bit location '5'. +static code_t insEncodeSveSmallFloatImm(ssize_t imm); + // Returns the first register list size for the given SVE instruction. static int insGetSveReg1ListSize(instruction ins); @@ -589,6 +624,9 @@ static code_t insEncodeUimm4From1_19_to_16(ssize_t imm); // Returns the encoding for the immediate value as 8-bits at bit locations '12-5'. static code_t insEncodeImm8_12_to_5(ssize_t imm); +// Returns the encoding for the unsigned immediate value as 3-bits at bit locations '18-16'. +static code_t insEncodeUimm3_18_to_16(ssize_t imm); + // Returns the encoding to select the elemsize for an Arm64 SVE vector instruction plus an immediate. // This specifically encodes the field 'tszh:tszl' at bit locations '23-22:9-8'. static code_t insEncodeSveShift_23_to_22_9_to_0(emitAttr size, bool isRightShift, size_t imm); @@ -1258,6 +1296,9 @@ void emitIns_R_R_I(instruction ins, insOpts opt = INS_OPTS_NONE, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); +void emitIns_R_R_F( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, double immDbl, insOpts opt = INS_OPTS_NONE); + // Checks for a large immediate that needs a second instruction void emitIns_R_R_Imm(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, ssize_t imm); @@ -1316,6 +1357,15 @@ void emitInsSve_R_R_R_R(instruction ins, insOpts opt = INS_OPTS_NONE, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE); +void emitIns_R_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + ssize_t imm, + insOpts opt = INS_OPTS_NONE); + void emitIns_R_COND(instruction ins, emitAttr attr, regNumber reg, insCond cond); void emitIns_R_R_COND(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insCond cond);