forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X64] [tannergooding] Change Vector2/3/4, Quaternion, Plane, Vector<T>, and Vector64/128/256/51 ... #400
Comments
Top method regressions451 (58.12 % of base) - System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte ; Assembly listing for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
; rbp based frame
-; partially interruptible
+; fully interruptible
; No PGO data
-; 0 inlinees with PGO data; 19 single block inlinees; 23 inlinees without PGO data
+; 0 inlinees with PGO data; 23 single block inlinees; 9 inlinees without PGO data
; Final local variable assignments
;
-; V00 arg0 [V00,T27] ( 2, 2 ) simd32 -> mm0 single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
-; V01 arg1 [V01,T28] ( 2, 2 ) simd32 -> mm1 single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+; V00 arg0 [V00,T32] ( 2, 2 ) simd32 -> [rbp+0x10] single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
+; V01 arg1 [V01,T33] ( 2, 2 ) simd32 -> [rbp+0x30] single-def <System.Runtime.Intrinsics.Vector256`1[ubyte]>
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V03 tmp1 [V03,T12] ( 2, 4 ) int -> rax "impAppendStmt"
-; V04 tmp2 [V04,T23] ( 3, 6 ) simd16 -> [rbp-0x10] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-; V05 tmp3 [V05,T24] ( 3, 6 ) simd16 -> [rbp-0x20] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-; V06 tmp4 [V06,T13] ( 2, 4 ) int -> rax "impAppendStmt"
-; V07 tmp5 [V07,T04] ( 8, 8 ) ubyte -> rax ld-addr-op "Inline ldloca(s) first use temp"
-;* V08 tmp6 [V08,T19] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
-; V09 tmp7 [V09 ] ( 9, 18 ) struct ( 8) [rbp-0x28] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-; V10 tmp8 [V10 ] ( 9, 18 ) struct ( 8) [rbp-0x30] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V11 tmp9 [V11 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-; V12 tmp10 [V12,T00] ( 8, 16 ) int -> registers "impAppendStmt"
-;* V13 tmp11 [V13 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-;* V14 tmp12 [V14 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V15 tmp13 [V15 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V16 tmp14 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V17 tmp15 [V17 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-;* V18 tmp16 [V18 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-; V19 tmp17 [V19,T08] ( 8, 8 ) ubyte -> registers "Inline return value spill temp"
-;* V20 tmp18 [V20 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V21 tmp19 [V21 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V22 tmp20 [V22 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V23 tmp21 [V23 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V24 tmp22 [V24 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V25 tmp23 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-; V26 tmp24 [V26,T05] ( 8, 8 ) ubyte -> rcx ld-addr-op "Inline ldloca(s) first use temp"
-;* V27 tmp25 [V27,T20] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
-; V28 tmp26 [V28 ] ( 9, 18 ) struct ( 8) [rbp-0x38] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-; V29 tmp27 [V29 ] ( 9, 18 ) struct ( 8) [rbp-0x40] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V03 tmp1 [V03,T12] ( 2, 4 ) int -> rbx "impAppendStmt"
+; V04 tmp2 [V04,T28] ( 3, 6 ) simd16 -> [rbp-0x30] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V05 tmp3 [V05,T29] ( 3, 6 ) simd16 -> [rbp-0x40] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V06 tmp4 [V06,T13] ( 2, 4 ) int -> rbx "impAppendStmt"
+;* V07 tmp5 [V07 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V08 tmp6 [V08 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V09 tmp7 [V09 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V10 tmp8 [V10 ] ( 2, 5 ) struct ( 8) [rbp-0x48] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V11 tmp9 [V11,T04] ( 5, 17 ) int -> rsi "Inline stloc first use temp"
+; V12 tmp10 [V12 ] ( 2, 10 ) struct ( 8) [rbp-0x50] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V13 tmp11 [V13 ] ( 2, 10 ) struct ( 8) [rbp-0x58] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V14 tmp12 [V14 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+;* V15 tmp13 [V15 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+;* V16 tmp14 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
+;* V17 tmp15 [V17 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
+;* V18 tmp16 [V18 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
+; V19 tmp17 [V19,T00] ( 16, 16 ) ubyte -> rbx ld-addr-op "Inline ldloca(s) first use temp"
+;* V20 tmp18 [V20,T24] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V21 tmp19 [V21 ] ( 9, 18 ) struct ( 8) [rbp-0x60] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V22 tmp20 [V22 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+;* V23 tmp21 [V23 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V24 tmp22 [V24 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V25 tmp23 [V25 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V26 tmp24 [V26 ] ( 2, 5 ) struct ( 8) [rbp-0x68] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V27 tmp25 [V27,T05] ( 5, 17 ) int -> r15 "Inline stloc first use temp"
+; V28 tmp26 [V28 ] ( 2, 10 ) struct ( 8) [rbp-0x70] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V29 tmp27 [V29 ] ( 2, 10 ) struct ( 8) [rbp-0x78] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
;* V30 tmp28 [V30 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-; V31 tmp29 [V31,T01] ( 8, 16 ) int -> registers "impAppendStmt"
-;* V32 tmp30 [V32 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-;* V33 tmp31 [V33 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V34 tmp32 [V34 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V35 tmp33 [V35 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V36 tmp34 [V36 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-;* V37 tmp35 [V37 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-; V38 tmp36 [V38,T09] ( 8, 8 ) ubyte -> registers "Inline return value spill temp"
-;* V39 tmp37 [V39 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V40 tmp38 [V40 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V41 tmp39 [V41 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V42 tmp40 [V42 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V43 tmp41 [V43 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V44 tmp42 [V44 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V45 tmp43 [V45 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V46 tmp44 [V46 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V47 tmp45 [V47 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-; V48 tmp46 [V48,T25] ( 3, 6 ) simd16 -> [rbp-0x50] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-; V49 tmp47 [V49,T26] ( 3, 6 ) simd16 -> [rbp-0x60] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
-; V50 tmp48 [V50,T14] ( 2, 4 ) int -> rcx "impAppendStmt"
-; V51 tmp49 [V51,T06] ( 8, 8 ) ubyte -> rcx ld-addr-op "Inline ldloca(s) first use temp"
-;* V52 tmp50 [V52,T21] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
-; V53 tmp51 [V53 ] ( 9, 18 ) struct ( 8) [rbp-0x68] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-; V54 tmp52 [V54 ] ( 9, 18 ) struct ( 8) [rbp-0x70] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V55 tmp53 [V55 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-; V56 tmp54 [V56,T02] ( 8, 16 ) int -> registers "impAppendStmt"
-;* V57 tmp55 [V57 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-;* V58 tmp56 [V58 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V59 tmp57 [V59 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V60 tmp58 [V60 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V61 tmp59 [V61 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-;* V62 tmp60 [V62 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-; V63 tmp61 [V63,T10] ( 8, 8 ) ubyte -> registers "Inline return value spill temp"
-;* V64 tmp62 [V64 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V65 tmp63 [V65 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V66 tmp64 [V66 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V67 tmp65 [V67 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V68 tmp66 [V68 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V69 tmp67 [V69 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-; V70 tmp68 [V70,T07] ( 8, 8 ) ubyte -> rdx ld-addr-op "Inline ldloca(s) first use temp"
-;* V71 tmp69 [V71,T22] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
-; V72 tmp70 [V72 ] ( 9, 18 ) struct ( 8) [rbp-0x78] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-; V73 tmp71 [V73 ] ( 9, 18 ) struct ( 8) [rbp-0x80] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
-;* V74 tmp72 [V74 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-; V75 tmp73 [V75,T03] ( 8, 16 ) int -> registers "impAppendStmt"
-;* V76 tmp74 [V76 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
-;* V77 tmp75 [V77 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V78 tmp76 [V78 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V79 tmp77 [V79 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V80 tmp78 [V80 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-;* V81 tmp79 [V81 ] ( 0, 0 ) int -> zero-ref "Inlining Arg"
-; V82 tmp80 [V82,T11] ( 8, 8 ) ubyte -> registers "Inline return value spill temp"
-;* V83 tmp81 [V83 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V84 tmp82 [V84 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V85 tmp83 [V85 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V86 tmp84 [V86 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V87 tmp85 [V87 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V88 tmp86 [V88 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V89 tmp87 [V89 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V90 tmp88 [V90 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V91 tmp89 [V91 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-;* V92 tmp90 [V92 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V93 tmp91 [V93 ] ( 0, 0 ) ubyte -> zero-ref "Inlining Arg"
-; V94 tmp92 [V94 ] ( 9, 17 ) long -> [rbp-0x28] do-not-enreg[X] addr-exposed "field V09._00 (fldOffset=0x0)" P-DEP
-; V95 tmp93 [V95 ] ( 9, 17 ) long -> [rbp-0x30] do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
-; V96 tmp94 [V96 ] ( 9, 17 ) long -> [rbp-0x38] do-not-enreg[X] addr-exposed "field V28._00 (fldOffset=0x0)" P-DEP
-; V97 tmp95 [V97 ] ( 9, 17 ) long -> [rbp-0x40] do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
-; V98 tmp96 [V98 ] ( 9, 17 ) long -> [rbp-0x68] do-not-enreg[X] addr-exposed "field V53._00 (fldOffset=0x0)" P-DEP
-; V99 tmp97 [V99 ] ( 9, 17 ) long -> [rbp-0x70] do-not-enreg[X] addr-exposed "field V54._00 (fldOffset=0x0)" P-DEP
-; V100 tmp98 [V100 ] ( 9, 17 ) long -> [rbp-0x78] do-not-enreg[X] addr-exposed "field V72._00 (fldOffset=0x0)" P-DEP
-; V101 tmp99 [V101 ] ( 9, 17 ) long -> [rbp-0x80] do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
-; V102 cse0 [V102,T15] ( 2, 2 ) int -> rcx "CSE #01: moderate"
-; V103 cse1 [V103,T16] ( 2, 2 ) int -> rdx "CSE #02: moderate"
-; V104 cse2 [V104,T17] ( 2, 2 ) int -> rdx "CSE #03: moderate"
-; V105 cse3 [V105,T18] ( 2, 2 ) int -> rdi "CSE #04: moderate"
+;* V31 tmp29 [V31 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+; V32 tmp30 [V32,T01] ( 16, 16 ) ubyte -> rax ld-addr-op "Inline ldloca(s) first use temp"
+;* V33 tmp31 [V33,T25] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V34 tmp32 [V34 ] ( 9, 18 ) struct ( 8) [rbp-0x80] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V35 tmp33 [V35 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+; V36 tmp34 [V36,T30] ( 3, 6 ) simd16 -> [rbp-0x90] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V37 tmp35 [V37,T31] ( 3, 6 ) simd16 -> [rbp-0xA0] do-not-enreg[SF] "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
+; V38 tmp36 [V38,T14] ( 2, 4 ) int -> r15 "impAppendStmt"
+;* V39 tmp37 [V39 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V40 tmp38 [V40 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V41 tmp39 [V41 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V42 tmp40 [V42 ] ( 2, 5 ) struct ( 8) [rbp-0xA8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V43 tmp41 [V43,T06] ( 5, 17 ) int -> r15 "Inline stloc first use temp"
+; V44 tmp42 [V44 ] ( 2, 10 ) struct ( 8) [rbp-0xB0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V45 tmp43 [V45 ] ( 2, 10 ) struct ( 8) [rbp-0xB8] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V46 tmp44 [V46 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+;* V47 tmp45 [V47 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+; V48 tmp46 [V48,T02] ( 16, 16 ) ubyte -> r15 ld-addr-op "Inline ldloca(s) first use temp"
+;* V49 tmp47 [V49,T26] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V50 tmp48 [V50 ] ( 9, 18 ) struct ( 8) [rbp-0xC0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V51 tmp49 [V51 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+;* V52 tmp50 [V52 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V53 tmp51 [V53 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V54 tmp52 [V54 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V55 tmp53 [V55 ] ( 2, 5 ) struct ( 8) [rbp-0xC8] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V56 tmp54 [V56,T07] ( 5, 17 ) int -> r14 "Inline stloc first use temp"
+; V57 tmp55 [V57 ] ( 2, 10 ) struct ( 8) [rbp-0xD0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+; V58 tmp56 [V58 ] ( 2, 10 ) struct ( 8) [rbp-0xD8] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V59 tmp57 [V59 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+;* V60 tmp58 [V60 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
+; V61 tmp59 [V61,T03] ( 16, 16 ) ubyte -> rax ld-addr-op "Inline ldloca(s) first use temp"
+;* V62 tmp60 [V62,T27] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
+; V63 tmp61 [V63 ] ( 9, 18 ) struct ( 8) [rbp-0xE0] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg" <System.Runtime.Intrinsics.Vector64`1[ubyte]>
+;* V64 tmp62 [V64 ] ( 0, 0 ) int -> zero-ref "impAppendStmt"
+; V65 tmp63 [V65,T16] ( 2, 2 ) long -> rsi "field V07._00 (fldOffset=0x0)" P-INDEP
+; V66 tmp64 [V66,T17] ( 2, 2 ) long -> rdi "field V08._00 (fldOffset=0x0)" P-INDEP
+;* V67 tmp65 [V67 ] ( 0, 0 ) long -> zero-ref "field V09._00 (fldOffset=0x0)" P-INDEP
+; V68 tmp66 [V68 ] ( 2, 5 ) long -> [rbp-0x48] do-not-enreg[X] addr-exposed "field V10._00 (fldOffset=0x0)" P-DEP
+; V69 tmp67 [V69 ] ( 2, 9 ) long -> [rbp-0x50] do-not-enreg[X] addr-exposed "field V12._00 (fldOffset=0x0)" P-DEP
+; V70 tmp68 [V70 ] ( 2, 9 ) long -> [rbp-0x58] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
+; V71 tmp69 [V71 ] ( 9, 17 ) long -> [rbp-0x60] do-not-enreg[X] addr-exposed "field V21._00 (fldOffset=0x0)" P-DEP
+; V72 tmp70 [V72,T18] ( 2, 2 ) long -> rdi "field V23._00 (fldOffset=0x0)" P-INDEP
+; V73 tmp71 [V73,T19] ( 2, 2 ) long -> rsi "field V24._00 (fldOffset=0x0)" P-INDEP
+;* V74 tmp72 [V74 ] ( 0, 0 ) long -> zero-ref "field V25._00 (fldOffset=0x0)" P-INDEP
+; V75 tmp73 [V75 ] ( 2, 5 ) long -> [rbp-0x68] do-not-enreg[X] addr-exposed "field V26._00 (fldOffset=0x0)" P-DEP
+; V76 tmp74 [V76 ] ( 2, 9 ) long -> [rbp-0x70] do-not-enreg[X] addr-exposed "field V28._00 (fldOffset=0x0)" P-DEP
+; V77 tmp75 [V77 ] ( 2, 9 ) long -> [rbp-0x78] do-not-enreg[X] addr-exposed "field V29._00 (fldOffset=0x0)" P-DEP
+; V78 tmp76 [V78 ] ( 9, 17 ) long -> [rbp-0x80] do-not-enreg[X] addr-exposed "field V34._00 (fldOffset=0x0)" P-DEP
+; V79 tmp77 [V79,T20] ( 2, 2 ) long -> rdi "field V39._00 (fldOffset=0x0)" P-INDEP
+; V80 tmp78 [V80,T21] ( 2, 2 ) long -> rsi "field V40._00 (fldOffset=0x0)" P-INDEP
+;* V81 tmp79 [V81 ] ( 0, 0 ) long -> zero-ref "field V41._00 (fldOffset=0x0)" P-INDEP
+; V82 tmp80 [V82 ] ( 2, 5 ) long -> [rbp-0xA8] do-not-enreg[X] addr-exposed "field V42._00 (fldOffset=0x0)" P-DEP
+; V83 tmp81 [V83 ] ( 2, 9 ) long -> [rbp-0xB0] do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
+; V84 tmp82 [V84 ] ( 2, 9 ) long -> [rbp-0xB8] do-not-enreg[X] addr-exposed "field V45._00 (fldOffset=0x0)" P-DEP
+; V85 tmp83 [V85 ] ( 9, 17 ) long -> [rbp-0xC0] do-not-enreg[X] addr-exposed "field V50._00 (fldOffset=0x0)" P-DEP
+; V86 tmp84 [V86,T22] ( 2, 2 ) long -> rdi "field V52._00 (fldOffset=0x0)" P-INDEP
+; V87 tmp85 [V87,T23] ( 2, 2 ) long -> rsi "field V53._00 (fldOffset=0x0)" P-INDEP
+;* V88 tmp86 [V88 ] ( 0, 0 ) long -> zero-ref "field V54._00 (fldOffset=0x0)" P-INDEP
+; V89 tmp87 [V89 ] ( 2, 5 ) long -> [rbp-0xC8] do-not-enreg[X] addr-exposed "field V55._00 (fldOffset=0x0)" P-DEP
+; V90 tmp88 [V90 ] ( 2, 9 ) long -> [rbp-0xD0] do-not-enreg[X] addr-exposed "field V57._00 (fldOffset=0x0)" P-DEP
+; V91 tmp89 [V91 ] ( 2, 9 ) long -> [rbp-0xD8] do-not-enreg[X] addr-exposed "field V58._00 (fldOffset=0x0)" P-DEP
+; V92 tmp90 [V92 ] ( 9, 17 ) long -> [rbp-0xE0] do-not-enreg[X] addr-exposed "field V63._00 (fldOffset=0x0)" P-DEP
+; V93 tmp91 [V93,T15] ( 2, 4 ) int -> rsi "argument with side effect"
+; V94 cse0 [V94,T08] ( 4, 16 ) long -> rax "CSE #01: aggressive"
+; V95 cse1 [V95,T09] ( 4, 16 ) long -> r14 "CSE #02: aggressive"
+; V96 cse2 [V96,T10] ( 4, 16 ) long -> r14 "CSE #03: aggressive"
+; V97 cse3 [V97,T11] ( 4, 16 ) long -> r13 "CSE #04: aggressive"
;
-; Lcl frame size = 128
+; Lcl frame size = 192
G_M42821_IG01:
push rbp
- sub rsp, 128
- lea rbp, [rsp+0x80]
+ push r15
+ push r14
+ push r13
+ push rbx
+ sub rsp, 192
+ lea rbp, [rsp+0xE0]
vmovups ymm0, ymmword ptr [rbp+0x10]
vmovups ymm1, ymmword ptr [rbp+0x30]
- ;; size=26 bbWeight=1 PerfScore 9.75
+ ;; size=33 bbWeight=1 PerfScore 13.75
G_M42821_IG02:
+ vmovups ymmword ptr [rbp+0x10], ymm0
vmovaps ymm2, ymm0
- vmovaps xmmword ptr [rbp-0x10], xmm2
+ vmovaps xmmword ptr [rbp-0x30], xmm2
+ vmovups ymmword ptr [rbp+0x30], ymm1
vmovaps ymm2, ymm1
- vmovaps xmmword ptr [rbp-0x20], xmm2
- mov rax, qword ptr [rbp-0x10]
- mov qword ptr [rbp-0x28], rax
- mov rax, qword ptr [rbp-0x20]
- mov qword ptr [rbp-0x30], rax
- movzx rax, byte ptr [rbp-0x28]
- movzx rcx, byte ptr [rbp-0x30]
- imul eax, ecx
- movzx rax, al
- movzx rcx, byte ptr [rbp-0x27]
- movzx rdx, byte ptr [rbp-0x2F]
- imul ecx, edx
- movzx rcx, cl
- add eax, ecx
- movzx rcx, al
- movzx rax, cl
- movzx rcx, byte ptr [rbp-0x26]
- movzx rdx, byte ptr [rbp-0x2E]
- imul ecx, edx
- movzx rcx, cl
- movzx rdx, byte ptr [rbp-0x25]
- movzx rdi, byte ptr [rbp-0x2D]
- imul edx, edi
- movzx rdx, dl
- add ecx, edx
- add eax, ecx
- movzx rax, al
- movzx rdx, byte ptr [rbp-0x24]
- movzx rcx, byte ptr [rbp-0x2C]
- imul ecx, edx
- movzx rcx, cl
- movzx rdx, byte ptr [rbp-0x23]
- movzx rdi, byte ptr [rbp-0x2B]
- imul edx, edi
- movzx rdx, dl
- add ecx, edx
- add ecx, eax
- movzx rax, cl
- movzx rdx, byte ptr [rbp-0x22]
- movzx rcx, byte ptr [rbp-0x2A]
- imul ecx, edx
- movzx rcx, cl
- movzx rdx, byte ptr [rbp-0x21]
- movzx rdi, byte ptr [rbp-0x29]
- imul edx, edi
- movzx rdx, dl
- add ecx, edx
- add ecx, eax
- movzx rax, cl
- mov rcx, qword ptr [rbp-0x08]
- mov qword ptr [rbp-0x38], rcx
- mov rcx, qword ptr [rbp-0x18]
- mov qword ptr [rbp-0x40], rcx
- movzx rcx, byte ptr [rbp-0x38]
- movzx rdx, byte ptr [rbp-0x40]
- imul ecx, edx
- movzx rcx, cl
- movzx rdx, byte ptr [rbp-0x37]
- movzx rdi, byte ptr [rbp-0x3F]
- imul edx, edi
- movzx rdx, dl
- add ecx, edx
- movzx rdx, cl
- movzx rcx, dl
- movzx rdx, byte ptr [rbp-0x36]
- movzx rdi, byte ptr [rbp-0x3E]
- imul edx, edi
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x35]
- ;; size=251 bbWeight=1 PerfScore 62.00
+ vmovaps xmmword ptr [rbp-0x40], xmm2
+ mov rsi, qword ptr [rbp-0x30]
+ mov rdi, qword ptr [rbp-0x40]
+ mov qword ptr [rbp-0x50], rsi
+ mov qword ptr [rbp-0x58], rdi
+ xor esi, esi
+ align [0 bytes for IG03]
+ ;; size=46 bbWeight=1 PerfScore 8.75
G_M42821_IG03:
- movzx rsi, byte ptr [rbp-0x3D]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- add ecx, edx
- movzx rcx, cl
- movzx rdi, byte ptr [rbp-0x34]
- movzx rdx, byte ptr [rbp-0x3C]
- imul edx, edi
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x33]
- movzx rsi, byte ptr [rbp-0x3B]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- add edx, ecx
- movzx rcx, dl
- movzx rdi, byte ptr [rbp-0x32]
- movzx rdx, byte ptr [rbp-0x3A]
- imul edx, edi
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x31]
- movzx rsi, byte ptr [rbp-0x39]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- add edx, ecx
- movzx rcx, dl
- add eax, ecx
- movzx rax, al
- vextractf128 xmm0, ymm0, 1
- vmovaps xmmword ptr [rbp-0x50], xmm0
- vextractf128 xmm0, ymm1, 1
- vmovaps xmmword ptr [rbp-0x60], xmm0
- mov rcx, qword ptr [rbp-0x50]
- mov qword ptr [rbp-0x68], rcx
- mov rcx, qword ptr [rbp-0x60]
- mov qword ptr [rbp-0x70], rcx
- movzx rcx, byte ptr [rbp-0x68]
- movzx rdx, byte ptr [rbp-0x70]
- imul ecx, edx
- movzx rcx, cl
- movzx rdx, byte ptr [rbp-0x67]
- movzx rdi, byte ptr [rbp-0x6F]
- imul edx, edi
- movzx rdx, dl
- add ecx, edx
- movzx rdx, cl
- movzx rcx, dl
- movzx rdx, byte ptr [rbp-0x66]
- movzx rdi, byte ptr [rbp-0x6E]
- imul edx, edi
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x65]
- movzx rsi, byte ptr [rbp-0x6D]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- add ecx, edx
- movzx rcx, cl
- movzx rdi, byte ptr [rbp-0x64]
- movzx rdx, byte ptr [rbp-0x6C]
- imul edx, edi
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x63]
- movzx rsi, byte ptr [rbp-0x6B]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- add edx, ecx
- movzx rcx, dl
- movzx rdi, byte ptr [rbp-0x62]
- movzx rdx, byte ptr [rbp-0x6A]
- imul edx, edi
- ;; size=267 bbWeight=1 PerfScore 64.75
+ lea rdi, [rbp-0x50]
+ movsxd rax, esi
+ movzx rdi, byte ptr [rdi+rax]
+ lea rcx, [rbp-0x58]
+ movzx rcx, byte ptr [rcx+rax]
+ imul edi, ecx
+ lea rcx, [rbp-0x48]
+ mov byte ptr [rcx+rax], dil
+ inc esi
+ cmp esi, 8
+ jl SHORT G_M42821_IG03
+ ;; size=38 bbWeight=4 PerfScore 41.00
G_M42821_IG04:
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x61]
- movzx rsi, byte ptr [rbp-0x69]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- add edx, ecx
- movzx rcx, dl
- mov rdx, qword ptr [rbp-0x48]
- mov qword ptr [rbp-0x78], rdx
- mov rdx, qword ptr [rbp-0x58]
- mov qword ptr [rbp-0x80], rdx
- movzx rdx, byte ptr [rbp-0x78]
- movzx rdi, byte ptr [rbp-0x80]
- imul edx, edi
- movzx rdx, dl
- movzx rdi, byte ptr [rbp-0x77]
+ mov rsi, qword ptr [rbp-0x48]
+ mov qword ptr [rbp-0x60], rsi
+ movzx rsi, byte ptr [rbp-0x60]
+ xor edi, edi
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x5F]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x5E]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x5D]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x5C]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x5B]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x5A]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ movzx rsi, byte ptr [rbp-0x59]
+ mov edi, ebx
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ mov rdi, qword ptr [rbp-0x28]
+ mov rsi, qword ptr [rbp-0x38]
+ mov qword ptr [rbp-0x70], rdi
+ mov qword ptr [rbp-0x78], rsi
+ xor r15d, r15d
+ ;; size=195 bbWeight=1 PerfScore 44.25
+G_M42821_IG05:
+ lea rdi, [rbp-0x70]
+ movsxd r14, r15d
+ movzx rdi, byte ptr [rdi+r14]
+ lea rsi, [rbp-0x78]
+ movzx rsi, byte ptr [rsi+r14]
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+ lea rsi, [rbp-0x68]
+ mov byte ptr [rsi+r14], al
+ inc r15d
+ cmp r15d, 8
+ jl SHORT G_M42821_IG05
+ ;; size=50 bbWeight=4 PerfScore 46.00
+G_M42821_IG06:
+ mov rsi, qword ptr [rbp-0x68]
+ mov qword ptr [rbp-0x80], rsi
+ movzx rsi, byte ptr [rbp-0x80]
+ xor edi, edi
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
movzx rsi, byte ptr [rbp-0x7F]
- imul edi, esi
- movzx rdi, dil
- add edx, edi
- movzx rdi, dl
- movzx rdx, dil
- movzx rdi, byte ptr [rbp-0x76]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
movzx rsi, byte ptr [rbp-0x7E]
- imul edi, esi
- movzx rdi, dil
- movzx rsi, byte ptr [rbp-0x75]
- movzx r8, byte ptr [rbp-0x7D]
- imul esi, r8d
- movzx rsi, sil
- add edi, esi
- add edx, edi
- movzx rdx, dl
- movzx rsi, byte ptr [rbp-0x74]
- movzx rdi, byte ptr [rbp-0x7C]
- imul edi, esi
- movzx rdi, dil
- movzx rsi, byte ptr [rbp-0x73]
- movzx r8, byte ptr [rbp-0x7B]
- imul esi, r8d
- movzx rsi, sil
- add edi, esi
- add edi, edx
- movzx rdx, dil
- movzx rsi, byte ptr [rbp-0x72]
- movzx rdi, byte ptr [rbp-0x7A]
- imul edi, esi
- movzx rdi, dil
- movzx rsi, byte ptr [rbp-0x71]
- movzx r8, byte ptr [rbp-0x79]
- imul esi, r8d
- movzx rsi, sil
- add edi, esi
- add edi, edx
- movzx rdx, dil
- add ecx, edx
- add eax, ecx
- movzx rax, al
- ;; size=220 bbWeight=1 PerfScore 47.00
-G_M42821_IG05:
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0x7D]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0x7C]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0x7B]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0x7A]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0x79]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rdi, bl
+ mov esi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov ebx, eax
+ vmovups ymm0, ymmword ptr [rbp+0x10]
+ vextractf128 xmm0, ymm0, 1
+ vmovaps xmmword ptr [rbp-0x90], xmm0
+ vmovups ymm1, ymmword ptr [rbp+0x30]
+ vextractf128 xmm0, ymm1, 1
+ vmovaps xmmword ptr [rbp-0xA0], xmm0
+ mov rdi, qword ptr [rbp-0x90]
+ mov rsi, qword ptr [rbp-0xA0]
+ mov qword ptr [rbp-0xB0], rdi
+ mov qword ptr [rbp-0xB8], rsi
+ xor r15d, r15d
+ ;; size=249 bbWeight=1 PerfScore 60.25
+G_M42821_IG07:
+ lea rdi, [rbp-0xB0]
+ movsxd r14, r15d
+ movzx rdi, byte ptr [rdi+r14]
+ lea rsi, [rbp-0xB8]
+ movzx rsi, byte ptr [rsi+r14]
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+ lea rsi, [rbp-0xA8]
+ mov byte ptr [rsi+r14], al
+ inc r15d
+ cmp r15d, 8
+ jl SHORT G_M42821_IG07
+ ;; size=59 bbWeight=4 PerfScore 46.00
+G_M42821_IG08:
+ mov rsi, qword ptr [rbp-0xA8]
+ mov qword ptr [rbp-0xC0], rsi
+ movzx rsi, byte ptr [rbp-0xC0]
+ xor edi, edi
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xBF]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xBE]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xBD]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xBC]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xBB]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xBA]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ movzx rsi, byte ptr [rbp-0xB9]
+ mov edi, r15d
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov r15d, eax
+ mov rdi, qword ptr [rbp-0x88]
+ mov rsi, qword ptr [rbp-0x98]
+ mov qword ptr [rbp-0xD0], rdi
+ mov qword ptr [rbp-0xD8], rsi
+ xor r14d, r14d
+ ;; size=252 bbWeight=1 PerfScore 44.25
+G_M42821_IG09:
+ lea rdi, [rbp-0xD0]
+ movsxd r13, r14d
+ movzx rdi, byte ptr [rdi+r13]
+ lea rsi, [rbp-0xD8]
+ movzx rsi, byte ptr [rsi+r13]
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Multiply(ubyte,ubyte):ubyte
+ lea rsi, [rbp-0xC8]
+ mov byte ptr [rsi+r13], al
+ inc r14d
+ cmp r14d, 8
+ jl SHORT G_M42821_IG09
+ ;; size=59 bbWeight=4 PerfScore 46.00
+G_M42821_IG10:
+ mov rsi, qword ptr [rbp-0xC8]
+ mov qword ptr [rbp-0xE0], rsi
+ movzx rsi, byte ptr [rbp-0xE0]
+ xor edi, edi
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xDF]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xDE]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xDD]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xDC]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xDB]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xDA]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rsi, byte ptr [rbp-0xD9]
+ mov edi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ movzx rdi, r15b
+ mov esi, eax
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ mov esi, eax
+ movzx rdi, bl
+ mov rax, 0xD1FFAB1E ; code for System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ call [rax]System.Runtime.Intrinsics.Scalar`1[ubyte]:Add(ubyte,ubyte):ubyte
+ nop
+ ;; size=227 bbWeight=1 PerfScore 45.75
+G_M42821_IG11:
vzeroupper
- add rsp, 128
+ add rsp, 192
+ pop rbx
+ pop r13
+ pop r14
+ pop r15
pop rbp
ret
- ;; size=12 bbWeight=1 PerfScore 2.75
+ ;; size=19 bbWeight=1 PerfScore 4.75
-; Total bytes of code 776, prolog size 16, PerfScore 186.25, instruction count 214, allocated bytes for code 776 (MethodHash=d63158ba) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte (FullOpts)
+; Total bytes of code 1227, prolog size 33, PerfScore 400.75, instruction count 263, allocated bytes for code 1227 (MethodHash=d63158ba) for method System.Runtime.Intrinsics.Vector256`1[ubyte]:System.Runtime.Intrinsics.ISimdVector<System.Runtime.Intrinsics.Vector256<T>,T>.Dot(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte]):ubyte (FullOpts) Note: some changes were skipped as they were too large to fit into a comment. Larger list of diffs: https://gist.github.com/MihuBot/f6ef2f25cbdf1a6fa108ff971f43d624 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Job completed in 29 minutes.
dotnet/runtime#102301
Diffs
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: