diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 330cfc9f435d0..1f030310a717c 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -6001,6 +6001,216 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_S); // ST1W {.}, , [{, #, MUL VL}] theEmitter->emitIns_R_R_R_I(INS_sve_st1w, EA_SCALABLE, REG_V2, REG_P1, REG_R3, 1, INS_OPTS_SCALABLE_D); // ST1W {.}, , [{, #, MUL VL}] + + // IF_SVE_HW_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V0, + INS_OPTS_SCALABLE_D_UXTW); // LD1B {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P3, REG_R5, REG_V4, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1H {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P0, REG_R1, REG_V5, + INS_OPTS_SCALABLE_D_UXTW); // LD1SB {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V5, REG_P2, REG_R1, REG_V3, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1SH {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V4, REG_P5, REG_R6, REG_V1, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1W {.S }, /Z, [, .S, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, REG_V4, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1B {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V2, REG_P1, REG_R3, REG_V4, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1H {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V2, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1SB {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_V5, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1SH {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V4, REG_P1, REG_R2, REG_V3, INS_OPTS_SCALABLE_S_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1W {.S }, /Z, [, .S, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V0, + INS_OPTS_SCALABLE_D_SXTW); // LD1B {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P3, REG_R5, REG_V4, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1H {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V2, REG_P0, REG_R1, REG_V5, + INS_OPTS_SCALABLE_D_SXTW); // LD1SB {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V5, REG_P2, REG_R1, REG_V3, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1SH {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V4, REG_P5, REG_R6, REG_V1, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1W {.S }, /Z, [, .S, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, REG_V4, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1B {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V2, REG_P1, REG_R3, REG_V4, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1H {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V2, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1SB {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V3, REG_P2, REG_R4, REG_V5, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1SH {.S }, /Z, [, .S, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V4, REG_P1, REG_R2, REG_V3, INS_OPTS_SCALABLE_S_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1W {.S }, /Z, [, .S, #2] + + // IF_SVE_HW_4A_A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P4, REG_R3, REG_V2, + INS_OPTS_SCALABLE_S_UXTW); // LD1B {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R0, REG_V1, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1H {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_V1, + INS_OPTS_SCALABLE_S_UXTW); // LD1SB {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V5, REG_P4, REG_R3, REG_V2, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1SH {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V4, REG_P3, REG_R2, REG_V1, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1W {.D }, /Z, [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, REG_V0, + INS_OPTS_SCALABLE_S_UXTW); // LDFF1B {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P5, REG_R1, REG_V2, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1H {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P1, REG_R4, REG_V3, + INS_OPTS_SCALABLE_S_UXTW); // LDFF1SB {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1SH {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V5, REG_P0, REG_R1, REG_V2, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1W {.D }, /Z, [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V5, REG_P4, REG_R3, REG_V2, + INS_OPTS_SCALABLE_S_SXTW); // LD1B {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V2, REG_P1, REG_R0, REG_V1, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1H {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_V1, + INS_OPTS_SCALABLE_S_SXTW); // LD1SB {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V5, REG_P4, REG_R3, REG_V2, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1SH {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V4, REG_P3, REG_R2, REG_V1, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1W {.D }, /Z, [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V3, REG_P2, REG_R1, REG_V0, + INS_OPTS_SCALABLE_S_SXTW); // LDFF1B {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P5, REG_R1, REG_V2, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1H {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V2, REG_P1, REG_R4, REG_V3, + INS_OPTS_SCALABLE_S_SXTW); // LDFF1SB {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1SH {.D }, /Z, [, .D, #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V5, REG_P0, REG_R1, REG_V2, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1W {.D }, /Z, [, .D, #2] + + // IF_SVE_HW_4A_B + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P5, REG_R4, REG_V3, + INS_OPTS_SCALABLE_D_UXTW); // LD1H {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V3, REG_P0, REG_R1, REG_V2, + INS_OPTS_SCALABLE_D_UXTW); // LD1SH {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V4, REG_P5, REG_R3, REG_V1, + INS_OPTS_SCALABLE_D_UXTW); // LD1W {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V1, REG_P3, REG_R4, REG_V5, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1H {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V2, REG_P1, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1SH {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1W {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P5, REG_R4, REG_V3, + INS_OPTS_SCALABLE_D_SXTW); // LD1H {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V3, REG_P0, REG_R1, REG_V2, + INS_OPTS_SCALABLE_D_SXTW); // LD1SH {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V4, REG_P5, REG_R3, REG_V1, + INS_OPTS_SCALABLE_D_SXTW); // LD1W {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V1, REG_P3, REG_R4, REG_V5, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1H {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V2, REG_P1, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1SH {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V3, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1W {.D }, /Z, [, .D, ] + + // IF_SVE_HW_4A_C + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, + INS_OPTS_SCALABLE_S_UXTW); // LD1H {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R5, REG_V4, + INS_OPTS_SCALABLE_S_UXTW); // LD1SH {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V5, + INS_OPTS_SCALABLE_S_UXTW); // LD1W {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P2, REG_R1, REG_V3, + INS_OPTS_SCALABLE_S_UXTW); // LDFF1H {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V5, REG_P4, REG_R1, REG_V2, + INS_OPTS_SCALABLE_S_UXTW); // LDFF1SH {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V3, REG_P5, REG_R2, REG_V1, + INS_OPTS_SCALABLE_S_UXTW); // LDFF1W {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V0, REG_P1, REG_R2, REG_V3, + INS_OPTS_SCALABLE_S_SXTW); // LD1H {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R5, REG_V4, + INS_OPTS_SCALABLE_S_SXTW); // LD1SH {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V5, + INS_OPTS_SCALABLE_S_SXTW); // LD1W {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V4, REG_P2, REG_R1, REG_V3, + INS_OPTS_SCALABLE_S_SXTW); // LDFF1H {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V5, REG_P4, REG_R1, REG_V2, + INS_OPTS_SCALABLE_S_SXTW); // LDFF1SH {.S }, /Z, [, .S, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V3, REG_P5, REG_R2, REG_V1, + INS_OPTS_SCALABLE_S_SXTW); // LDFF1W {.S }, /Z, [, .S, ] + + // IF_SVE_IU_4A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V4, REG_P2, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1D {.D }, /Z, [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V2, REG_P3, REG_R5, REG_V4, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1SW {.D }, /Z, [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V5, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1D {.D }, /Z, [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V3, REG_P4, REG_R1, REG_V2, INS_OPTS_SCALABLE_D_UXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1SW {.D }, /Z, [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V4, REG_P2, REG_R1, REG_V3, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1D {.D }, /Z, [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V2, REG_P3, REG_R5, REG_V4, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LD1SW {.D }, /Z, [, .D, #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V5, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1D {.D }, /Z, [, .D, #3] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V3, REG_P4, REG_R1, REG_V2, INS_OPTS_SCALABLE_D_SXTW, + INS_SCALABLE_OPTS_MOD_N); // LDFF1SW {.D }, /Z, [, .D, #2] + + // IF_SVE_IU_4A_A + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, + INS_OPTS_SCALABLE_D_UXTW); // LD1SW {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V6, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1D {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V0, + INS_OPTS_SCALABLE_D_UXTW); // LDFF1SW {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sw, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, + INS_OPTS_SCALABLE_D_SXTW); // LD1SW {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1d, EA_SCALABLE, REG_V0, REG_P2, REG_R3, REG_V6, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1D {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sw, EA_SCALABLE, REG_V1, REG_P4, REG_R2, REG_V0, + INS_OPTS_SCALABLE_D_SXTW); // LDFF1SW {.D }, /Z, [, .D, ] + + // IF_SVE_IU_4A_C + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P3, REG_R5, REG_V2, + INS_OPTS_SCALABLE_D_UXTW); // LD1D {.D }, /Z, [, .D, ] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1d, EA_SCALABLE, REG_V0, REG_P3, REG_R5, REG_V2, + INS_OPTS_SCALABLE_D_SXTW); // LD1D {.D }, /Z, [, .D, ] + + // IF_SVE_HW_4B + theEmitter->emitIns_R_R_R_R(INS_sve_ld1b, EA_SCALABLE, REG_V4, REG_P5, REG_R6, REG_V1, + INS_OPTS_SCALABLE_D); // LD1B {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V5, REG_P4, REG_R3, REG_V2, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1H {.D }, /Z, [, .D, LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sb, EA_SCALABLE, REG_V6, REG_P3, REG_R1, REG_V4, + INS_OPTS_SCALABLE_D); // LD1SB {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1SH {.D }, /Z, [, .D, LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V0, REG_P3, REG_R2, REG_V1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LD1W {.D }, /Z, [, .D, LSL #2] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1b, EA_SCALABLE, REG_V2, REG_P5, REG_R4, REG_V3, + INS_OPTS_SCALABLE_D); // LDFF1B {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V0, REG_P2, REG_R6, REG_V1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1H {.D }, /Z, [, .D, LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sb, EA_SCALABLE, REG_V3, REG_P4, REG_R5, REG_V2, + INS_OPTS_SCALABLE_D); // LDFF1SB {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V5, REG_P4, REG_R3, REG_V2, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1SH {.D }, /Z, [, .D, LSL #1] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V3, REG_P2, REG_R0, REG_V1, INS_OPTS_SCALABLE_D, + INS_SCALABLE_OPTS_LSL_N); // LDFF1W {.D }, /Z, [, .D, LSL #2] + + // IF_SVE_HW_4B_D + theEmitter->emitIns_R_R_R_R(INS_sve_ld1h, EA_SCALABLE, REG_V4, REG_P2, REG_R1, REG_V3, + INS_OPTS_SCALABLE_D); // LD1H {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1sh, EA_SCALABLE, REG_V2, REG_P3, REG_R4, REG_V5, + INS_OPTS_SCALABLE_D); // LD1SH {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ld1w, EA_SCALABLE, REG_V1, REG_P2, REG_R3, REG_V4, + INS_OPTS_SCALABLE_D); // LD1W {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1h, EA_SCALABLE, REG_V2, REG_P3, REG_R1, REG_V5, + INS_OPTS_SCALABLE_D); // LDFF1H {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1sh, EA_SCALABLE, REG_V1, REG_P4, REG_R3, REG_V2, + INS_OPTS_SCALABLE_D); // LDFF1SH {.D }, /Z, [, .D] + theEmitter->emitIns_R_R_R_R(INS_sve_ldff1w, EA_SCALABLE, REG_V4, REG_P3, REG_R2, REG_V1, + INS_OPTS_SCALABLE_D); // LDFF1W {.D }, /Z, [, .D] } #endif // defined(TARGET_ARM64) && defined(DEBUG) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 7338328b788ad..55da6504ae142 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -1572,6 +1572,42 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isValidSimm4(imm)); // iiii break; + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + elemsize = id->idOpSize(); + assert(insOptsScalable32bitExtends(id->idInsOpt())); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + elemsize = id->idOpSize(); + assert(id->idInsOpt() == INS_OPTS_SCALABLE_D); + assert(isVectorRegister(id->idReg1())); // ttttt + assert(isPredicateRegister(id->idReg2())); // ggg + assert(isGeneralRegister(id->idReg3())); // nnnnn + assert(isVectorRegister(id->idReg4())); // mmmmm + assert(isScalableVectorSize(elemsize)); + break; + default: printf("unexpected format %s\n", emitIfName(id->idInsFmt())); assert(!"Unexpected format"); @@ -11332,6 +11368,112 @@ void emitter::emitIns_R_R_R_R(instruction ins, } break; + case INS_sve_ld1b: + case INS_sve_ld1sb: + case INS_sve_ldff1b: + case INS_sve_ldff1sb: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(isScalableVectorSize(size)); + assert(insScalableOptsNone(sopt)); + + if (insOptsScalableDoubleWord32bitExtends(opt)) + { + fmt = IF_SVE_HW_4A; + } + else if (insOptsScalableSingleWord32bitExtends(opt)) + { + fmt = IF_SVE_HW_4A_A; + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + fmt = IF_SVE_HW_4B; + } + break; + + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + case INS_sve_ld1w: + case INS_sve_ldff1w: + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(isScalableVectorSize(size)); + + if (insOptsScalableDoubleWord32bitExtends(opt)) + { + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_HW_4A_A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_HW_4A_B; + } + } + else if (insOptsScalableSingleWord32bitExtends(opt)) + { + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_HW_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_HW_4A_C; + } + } + else + { + assert(opt == INS_OPTS_SCALABLE_D); + if (sopt == INS_SCALABLE_OPTS_LSL_N) + { + fmt = IF_SVE_HW_4B; + } + else + { + assert(insScalableOptsNone(sopt)); + fmt = IF_SVE_HW_4B_D; + } + } + break; + + case INS_sve_ldff1sw: + case INS_sve_ldff1d: + case INS_sve_ld1d: + case INS_sve_ld1sw: + assert(insOptsScalableDoubleWord32bitExtends(opt)); + assert(isVectorRegister(reg1)); + assert(isPredicateRegister(reg2)); + assert(isGeneralRegister(reg3)); + assert(isVectorRegister(reg4)); + assert(isScalableVectorSize(size)); + + if (sopt == INS_SCALABLE_OPTS_MOD_N) + { + fmt = IF_SVE_IU_4A; + } + else + { + assert(insScalableOptsNone(sopt)); + if (ins == INS_sve_ld1d) + { + fmt = IF_SVE_IU_4A_C; + } + else + { + fmt = IF_SVE_IU_4A_A; + } + } + break; + default: unreached(); break; @@ -14662,6 +14804,351 @@ void emitter::emitIns_Call(EmitCallType callType, return PREDICATE_NONE; } +/***************************************************************************** + * + * Returns true if the SVE instruction has a LSL addr. + * This is for formats that have [, , LSL #N] + */ +/*static*/ bool emitter::insSveIsLslN(instruction ins, insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_JD_4A: + switch (ins) + { + case INS_sve_st1h: + return true; + + default: + break; + } + break; + + case IF_SVE_JD_4B: + switch (ins) + { + case INS_sve_st1w: + return true; + + default: + break; + } + break; + + case IF_SVE_HW_4B: + switch (ins) + { + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + case INS_sve_ld1w: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + default: + break; + } + + return false; +} + +/***************************************************************************** + * + * Returns true if the SVE instruction has a addr. + * This is for formats that have [, .T, ], [, .T, #N] + */ +/*static*/ bool emitter::insSveIsModN(instruction ins, insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_JJ_4A: + case IF_SVE_JJ_4A_B: + switch (ins) + { + case INS_sve_st1d: + case INS_sve_st1h: + case INS_sve_st1w: + return true; + + default: + break; + } + break; + + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + switch (ins) + { + case INS_sve_st1h: + case INS_sve_st1w: + return true; + + default: + break; + } + break; + + case IF_SVE_JK_4A: + case IF_SVE_JK_4A_B: + switch (ins) + { + case INS_sve_st1b: + return true; + + default: + break; + } + break; + + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + switch (ins) + { + case INS_sve_ld1b: + case INS_sve_ld1h: + case INS_sve_ld1sb: + case INS_sve_ld1sh: + case INS_sve_ld1w: + case INS_sve_ldff1b: + case INS_sve_ldff1h: + case INS_sve_ldff1sb: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + case IF_SVE_HW_4A_B: + case IF_SVE_HW_4A_C: + switch (ins) + { + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ld1w: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + case INS_sve_ldff1w: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4A: + switch (ins) + { + case INS_sve_ld1d: + case INS_sve_ld1sw: + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4A_A: + switch (ins) + { + case INS_sve_ld1sw: + case INS_sve_ldff1d: + case INS_sve_ldff1sw: + return true; + + default: + break; + } + break; + + case IF_SVE_IU_4A_C: + switch (ins) + { + case INS_sve_ld1d: + return true; + + default: + break; + } + break; + + default: + break; + } + + return false; +} + +/***************************************************************************** + * + * Returns 0, 1, 2 or 3 depending on the instruction and format. + * This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N] + */ + +/*static*/ int emitter::insSveGetLslOrModN(instruction ins, insFormat fmt) +{ + switch (fmt) + { + case IF_SVE_JD_4A: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_st1h: + return 1; + + default: + break; + } + break; + + case IF_SVE_JD_4B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_st1w: + return 2; + + default: + break; + } + break; + + case IF_SVE_HW_4B: + assert(insSveIsLslN(ins, fmt)); + assert(!insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + return 1; + + case INS_sve_ld1w: + case INS_sve_ldff1w: + return 2; + + default: + break; + } + break; + + case IF_SVE_JJ_4A: + case IF_SVE_JJ_4A_B: + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + case IF_SVE_JK_4A: + case IF_SVE_JK_4A_B: + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + case IF_SVE_HW_4A_B: + case IF_SVE_HW_4A_C: + case IF_SVE_IU_4A: + case IF_SVE_IU_4A_A: + case IF_SVE_IU_4A_C: + assert(!insSveIsLslN(ins, fmt)); + assert(insSveIsModN(ins, fmt)); + switch (ins) + { + case INS_sve_ld1h: + case INS_sve_ld1sh: + case INS_sve_ldff1h: + case INS_sve_ldff1sh: + switch (fmt) + { + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + return 1; + + default: + break; + } + return 0; + + case INS_sve_ld1w: + case INS_sve_ldff1w: + case INS_sve_ld1sw: + case INS_sve_ldff1sw: + switch (fmt) + { + case IF_SVE_HW_4A: + case IF_SVE_HW_4A_A: + case IF_SVE_IU_4A: + return 2; + + default: + break; + } + return 0; + + case INS_sve_ld1d: + case INS_sve_ldff1d: + switch (fmt) + { + case IF_SVE_IU_4A: + return 3; + + default: + break; + } + return 0; + + case INS_sve_st1h: + switch (fmt) + { + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + return 0; + + default: + break; + } + return 1; + + case INS_sve_st1w: + switch (fmt) + { + case IF_SVE_JJ_4A_C: + case IF_SVE_JJ_4A_D: + return 0; + + default: + break; + } + return 2; + + case INS_sve_st1d: + if (fmt == IF_SVE_JJ_4A_B) + { + return 0; + } + return 3; + + default: + break; + } + return 0; + + default: + break; + } + + assert(!"Unexpected instruction format"); + return 0; +} + /***************************************************************************** * * Returns true if the specified instruction can encode the 'dtype' field. @@ -17391,6 +17878,52 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + + switch (id->idInsOpt()) + { + case INS_OPTS_SCALABLE_S_SXTW: + case INS_OPTS_SCALABLE_D_SXTW: + code |= (1 << 22); // h + break; + + default: + break; + } + + dst += emitOutput_Instr(dst, code); + break; + + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + code = emitInsCodeSve(ins, fmt); + code |= insEncodeReg_V_4_to_0(id->idReg1()); // ttttt + code |= insEncodeReg_P_12_to_10(id->idReg2()); // ggg + code |= insEncodeReg_R_9_to_5(id->idReg3()); // nnnnn + code |= insEncodeReg_V_20_to_16(id->idReg4()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + default: assert(!"Unexpected format"); break; @@ -17839,6 +18372,83 @@ void emitter::emitDispSveExtendOpts(insOpts opt) } } +/***************************************************************************** + * + * Prints the encoding for the Extend Type encoding along with the N value + */ + +void emitter::emitDispSveExtendOptsModN(insOpts opt, int n) +{ + assert(n >= 0 && n <= 3); + + emitDispSveExtendOpts(opt); + switch (n) + { + case 3: + printf(" #3"); + break; + + case 2: + printf(" #2"); + break; + + case 1: + printf(" #1"); + break; + + default: + break; + } +} + +/***************************************************************************** + * + * Prints the encoding for the or LSL encoding along with the N value + * This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N] + */ +void emitter::emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt) +{ + printf("["); + emitDispReg(reg1, EA_8BYTE, true); + if (isVectorRegister(reg2)) + { + emitDispSveReg(reg2, opt, false); + } + else + { + emitDispReg(reg2, EA_8BYTE, false); + } + + if (insOptsScalable32bitExtends(opt)) + { + emitDispComma(); + emitDispSveExtendOptsModN(opt, insSveGetLslOrModN(ins, fmt)); + } + else if (insSveIsLslN(ins, fmt)) + { + emitDispComma(); + switch (insSveGetLslOrModN(ins, fmt)) + { + case 3: + printf("lsl #3"); + break; + + case 2: + printf("lsl #2"); + break; + + case 1: + printf("lsl #1"); + break; + + default: + assert(!"Invalid instruction"); + break; + } + } + printf("]"); +} + /***************************************************************************** * * Prints the encoding for the Extend Type encoding in loads/stores @@ -20149,32 +20759,8 @@ void emitter::emitDispInsHelp( // {.}, , [, ] // {.}, , [, , LSL #1] case IF_SVE_JD_4A: // .........xxmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg - printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn - if (ins == INS_sve_st1h) - { - emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm - printf("lsl #1]"); - } - else - { - emitDispReg(id->idReg4(), EA_8BYTE, false); // mmmmm - printf("]"); - } - break; - // {.}, , [, , LSL #2] case IF_SVE_JD_4B: // ..........xmmmmm ...gggnnnnnttttt -- SVE contiguous store (scalar plus scalar) - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg - printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn - emitDispReg(id->idReg4(), EA_8BYTE, true); // mmmmm - printf("lsl #2]"); - break; - // {.D }, , [, .D, #3] // {.S }, , [, .S, #1] // {.S }, , [, .S, #2] @@ -20197,58 +20783,45 @@ void emitter::emitDispInsHelp( // {.S }, , [, .S, ] case IF_SVE_JK_4A_B: // ...........mmmmm .h.gggnnnnnttttt -- SVE 64-bit scatter store (scalar plus 64-bit // unscaled offsets) - { - emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt - emitDispPredicateReg(id->idReg2(), PREDICATE_NONE, id->idInsOpt(), true); // ggg - printf("["); - emitDispReg(id->idReg3(), EA_8BYTE, true); // nnnnn - emitDispSveReg(id->idReg4(), id->idInsOpt(), true); // mmmmm - emitDispSveExtendOpts(id->idInsOpt()); - switch (ins) - { - case INS_sve_st1b: - printf("]"); - break; - - case INS_sve_st1h: - if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) - { - printf("]"); - } - else - { - printf(" #1]"); - } - break; - - case INS_sve_st1w: - if ((fmt == IF_SVE_JJ_4A_C) || (fmt == IF_SVE_JJ_4A_D)) - { - printf("]"); - } - else - { - printf(" #2]"); - } - break; - - case INS_sve_st1d: - if (fmt == IF_SVE_JJ_4A_B) - { - printf("]"); - } - else - { - printf(" #3]"); - } - break; - - default: - assert(!"Invalid instruction"); - break; - } - break; - } + // {.D }, /Z, [, .D, ] + // {.S }, /Z, [, .S, #1] + // {.S }, /Z, [, .S, #2] + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.S }, /Z, [, .S, ] + // {.D }, /Z, [, .D, #1] + // {.D }, /Z, [, .D, #2] + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.D }, /Z, [, .D, ] + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.S }, /Z, [, .S, ] + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.D }, /Z, [, .D, #2] + // {.D }, /Z, [, .D, #3] + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D, ] + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D, ] + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + // {.D }, /Z, [, .D] + // {.D }, /Z, [, .D, LSL #1] + // {.D }, /Z, [, .D, LSL #2] + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + // {.D }, /Z, [, .D] + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + emitDispSveConsecutiveRegList(id->idReg1(), insGetSveReg1ListSize(ins), id->idInsOpt(), true); // ttttt + emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // ggg + emitDispSveModAddr(ins, id->idReg3(), id->idReg4(), id->idInsOpt(), fmt); // nnnnn + // mmmmm + break; // {.}, , [{, #, MUL VL}] case IF_SVE_JN_3A: // .........xx.iiii ...gggnnnnnttttt -- SVE contiguous store (scalar plus immediate) @@ -23165,6 +23738,28 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_SVE_HW_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_B: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_IU_4A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_A: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_IU_4A_C: // .........h.mmmmm ...gggnnnnnttttt -- SVE 64-bit gather load (scalar plus 32-bit unpacked + // scaled offsets) + case IF_SVE_HW_4B: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + case IF_SVE_HW_4B_D: // ...........mmmmm ...gggnnnnnttttt -- SVE 32-bit gather load (scalar plus 32-bit unscaled + // offsets) + result.insThroughput = PERFSCORE_THROUGHPUT_1C; + result.insLatency = PERFSCORE_LATENCY_9C; + break; + default: // all other instructions perfScoreUnhandledInstruction(id, &result); diff --git a/src/coreclr/jit/emitarm64.h b/src/coreclr/jit/emitarm64.h index d3ea66913664a..aba2fe7a2ef68 100644 --- a/src/coreclr/jit/emitarm64.h +++ b/src/coreclr/jit/emitarm64.h @@ -48,6 +48,8 @@ void emitDispBarrier(insBarrier barrier); void emitDispShiftOpts(insOpts opt); void emitDispExtendOpts(insOpts opt); void emitDispSveExtendOpts(insOpts opt); +void emitDispSveExtendOptsModN(insOpts opt, int n); +void emitDispSveModAddr(instruction ins, regNumber reg1, regNumber reg2, insOpts opt, insFormat fmt); void emitDispLSExtendOpts(insOpts opt); void emitDispReg(regNumber reg, emitAttr attr, bool addComma); void emitDispSveReg(regNumber reg, insOpts opt, bool addComma); @@ -505,6 +507,18 @@ static int insGetSveReg1ListSize(instruction ins); // Register position is required for instructions with multiple predicates. static PredicateType insGetPredicateType(insFormat fmt, int regpos = 0); +// Returns true if the SVE instruction has a LSL addr. +// This is for formats that have [, , LSL #N] +static bool insSveIsLslN(instruction ins, insFormat fmt); + +// Returns true if the SVE instruction has a addr. +// This is for formats that have [, .T, ], [, .T, #N] +static bool insSveIsModN(instruction ins, insFormat fmt); + +// Returns 0, 1, 2 or 3 depending on the instruction and format. +// This is for formats that have [, .T, ], [, .T, #N], [, , LSL #N] +static int insSveGetLslOrModN(instruction ins, insFormat fmt); + // Returns true if the specified instruction can encode the 'dtype' field. static bool canEncodeSveElemsize_dtype(instruction ins); @@ -1067,8 +1081,17 @@ inline static bool insOptsScalableWide(insOpts opt) inline static bool insOptsScalable32bitExtends(insOpts opt) { - return ((opt == INS_OPTS_SCALABLE_S_UXTW) || (opt == INS_OPTS_SCALABLE_S_SXTW) || - (opt == INS_OPTS_SCALABLE_D_UXTW) || (opt == INS_OPTS_SCALABLE_D_SXTW)); + return insOptsScalableSingleWord32bitExtends(opt) || insOptsScalableDoubleWord32bitExtends(opt); +} + +inline static bool insOptsScalableSingleWord32bitExtends(insOpts opt) +{ + return (opt == INS_OPTS_SCALABLE_S_UXTW) || (opt == INS_OPTS_SCALABLE_S_SXTW); +} + +inline static bool insOptsScalableDoubleWord32bitExtends(insOpts opt) +{ + return (opt == INS_OPTS_SCALABLE_D_UXTW) || (opt == INS_OPTS_SCALABLE_D_SXTW); } inline static bool insScalableOptsNone(insScalableOpts sopt)